1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
|
Description: expect newer chardet output
Origin: upstream 50c49a5f + 86a42ecd
Author: Dominik Wombacher
Forwarded: not-needed
--- a/tests/test_pagure_lib_encoding_utils.py
+++ b/tests/test_pagure_lib_encoding_utils.py
@@ -40,7 +40,7 @@ class TestGuessEncoding(unittest.TestCas
"""
Test that strings that could be UTF-8 or ISO-8859-* result in UTF-8.
- python-chardet-3.0.4-2.fc27.noarch detects it as ISO-8859-9
+ python-chardet-3.0.4-2.fc27.noarch and above detects it as ISO-8859-9
python-chardet-2.2.1-1.el7_1.noarch detects it as ISO-8859-2
"""
data = "Ĺ abata".encode("utf-8")
@@ -50,7 +50,7 @@ class TestGuessEncoding(unittest.TestCas
self.assertEqual(result, "WINDOWS-1250")
else:
self.assertEqual(result, "utf-8")
- if chardet.__version__[0] in ("3", "4"):
+ if chardet.__version__[0] in ("3", "4", "5"):
self.assertEqual(chardet_result["encoding"], "ISO-8859-9")
else:
self.assertEqual(chardet_result["encoding"], "ISO-8859-2")
@@ -75,7 +75,11 @@ class TestGuessEncodings(unittest.TestCa
# The first three have different confidence values
expexted_list = ["utf-8", "ISO-8859-9", "ISO-8859-1"]
# This is the one with the least confidence
- self.assertEqual(result[-1].encoding, "windows-1255")
+ print(result)
+ if chardet.__version__ >= '5.1.0':
+ self.assertEqual(result[-1].encoding, "TIS-620")
+ else:
+ self.assertEqual(result[-1].encoding, "windows-1255")
self.assertListEqual(
[encoding.encoding for encoding in result][:3],
expexted_list,
@@ -83,30 +87,58 @@ class TestGuessEncodings(unittest.TestCa
# The values in the middle of the list all have the same confidence
# value and can't be sorted reliably: use sets.
- expected_list = sorted(
- [
- "utf-8",
- "ISO-8859-9",
- "ISO-8859-1",
- "MacCyrillic",
- "IBM866",
- "TIS-620",
- "EUC-JP",
- "EUC-KR",
- "GB2312",
- "KOI8-R",
- "Big5",
- "IBM855",
- "ISO-8859-7",
- "SHIFT_JIS",
- "windows-1253",
- "CP949",
- "EUC-TW",
- "ISO-8859-5",
- "windows-1251",
- "windows-1255",
- ]
- )
+ if chardet.__version__ >= '5.1.0':
+ expected_list = sorted(
+ [
+ "utf-8",
+ "ISO-8859-9",
+ "ISO-8859-1",
+ "MacCyrillic",
+ "IBM866",
+ "TIS-620",
+ "EUC-JP",
+ "EUC-KR",
+ "GB2312",
+ "KOI8-R",
+ "Big5",
+ "IBM855",
+ "ISO-8859-7",
+ "SHIFT_JIS",
+ "windows-1253",
+ "CP949",
+ "EUC-TW",
+ "ISO-8859-5",
+ "windows-1251",
+ "windows-1255",
+ "Johab", # Added in 5.0.0
+ "MacRoman", # Added in 5.1.0
+ ]
+ )
+ else:
+ expected_list = sorted(
+ [
+ "utf-8",
+ "ISO-8859-9",
+ "ISO-8859-1",
+ "MacCyrillic",
+ "IBM866",
+ "TIS-620",
+ "EUC-JP",
+ "EUC-KR",
+ "GB2312",
+ "KOI8-R",
+ "Big5",
+ "IBM855",
+ "ISO-8859-7",
+ "SHIFT_JIS",
+ "windows-1253",
+ "CP949",
+ "EUC-TW",
+ "ISO-8859-5",
+ "windows-1251",
+ "windows-1255",
+ ]
+ )
self.assertListEqual(
sorted(set([encoding.encoding for encoding in result])),
expected_list,
|