File: 0031-chardet-encoding-list.patch

package info (click to toggle)
pagure 5.14.1%2Bdfsg-8
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 31,008 kB
  • sloc: python: 125,295; javascript: 22,012; makefile: 208; sh: 191
file content (122 lines) | stat: -rw-r--r-- 5,113 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
Description: expect newer chardet output

Origin: upstream 50c49a5f + 86a42ecd
Author: Dominik Wombacher
Forwarded: not-needed

--- a/tests/test_pagure_lib_encoding_utils.py
+++ b/tests/test_pagure_lib_encoding_utils.py
@@ -40,7 +40,7 @@ class TestGuessEncoding(unittest.TestCas
         """
         Test that strings that could be UTF-8 or ISO-8859-* result in UTF-8.
 
-        python-chardet-3.0.4-2.fc27.noarch detects it as ISO-8859-9
+        python-chardet-3.0.4-2.fc27.noarch and above detects it as ISO-8859-9
         python-chardet-2.2.1-1.el7_1.noarch detects it as ISO-8859-2
         """
         data = "Ĺ abata".encode("utf-8")
@@ -50,7 +50,7 @@ class TestGuessEncoding(unittest.TestCas
             self.assertEqual(result, "WINDOWS-1250")
         else:
             self.assertEqual(result, "utf-8")
-            if chardet.__version__[0] in ("3", "4"):
+            if chardet.__version__[0] in ("3", "4", "5"):
                 self.assertEqual(chardet_result["encoding"], "ISO-8859-9")
             else:
                 self.assertEqual(chardet_result["encoding"], "ISO-8859-2")
@@ -75,7 +75,11 @@ class TestGuessEncodings(unittest.TestCa
                 # The first three have different confidence values
                 expexted_list = ["utf-8", "ISO-8859-9", "ISO-8859-1"]
                 # This is the one with the least confidence
-                self.assertEqual(result[-1].encoding, "windows-1255")
+                print(result)
+                if chardet.__version__ >= '5.1.0':
+                    self.assertEqual(result[-1].encoding, "TIS-620")
+                else:
+                    self.assertEqual(result[-1].encoding, "windows-1255")
                 self.assertListEqual(
                     [encoding.encoding for encoding in result][:3],
                     expexted_list,
@@ -83,30 +87,58 @@ class TestGuessEncodings(unittest.TestCa
 
                 # The values in the middle of the list all have the same confidence
                 # value and can't be sorted reliably: use sets.
-                expected_list = sorted(
-                    [
-                        "utf-8",
-                        "ISO-8859-9",
-                        "ISO-8859-1",
-                        "MacCyrillic",
-                        "IBM866",
-                        "TIS-620",
-                        "EUC-JP",
-                        "EUC-KR",
-                        "GB2312",
-                        "KOI8-R",
-                        "Big5",
-                        "IBM855",
-                        "ISO-8859-7",
-                        "SHIFT_JIS",
-                        "windows-1253",
-                        "CP949",
-                        "EUC-TW",
-                        "ISO-8859-5",
-                        "windows-1251",
-                        "windows-1255",
-                    ]
-                )
+                if chardet.__version__ >= '5.1.0':
+                    expected_list = sorted(
+                        [
+                            "utf-8",
+                            "ISO-8859-9",
+                            "ISO-8859-1",
+                            "MacCyrillic",
+                            "IBM866",
+                            "TIS-620",
+                            "EUC-JP",
+                            "EUC-KR",
+                            "GB2312",
+                            "KOI8-R",
+                            "Big5",
+                            "IBM855",
+                            "ISO-8859-7",
+                            "SHIFT_JIS",
+                            "windows-1253",
+                            "CP949",
+                            "EUC-TW",
+                            "ISO-8859-5",
+                            "windows-1251",
+                            "windows-1255",
+                            "Johab",  # Added in 5.0.0
+                            "MacRoman",  # Added in 5.1.0
+                        ]
+                    )
+                else:
+                    expected_list = sorted(
+                        [
+                            "utf-8",
+                            "ISO-8859-9",
+                            "ISO-8859-1",
+                            "MacCyrillic",
+                            "IBM866",
+                            "TIS-620",
+                            "EUC-JP",
+                            "EUC-KR",
+                            "GB2312",
+                            "KOI8-R",
+                            "Big5",
+                            "IBM855",
+                            "ISO-8859-7",
+                            "SHIFT_JIS",
+                            "windows-1253",
+                            "CP949",
+                            "EUC-TW",
+                            "ISO-8859-5",
+                            "windows-1251",
+                            "windows-1255",
+                        ]
+                    )
                 self.assertListEqual(
                     sorted(set([encoding.encoding for encoding in result])),
                     expected_list,