1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286
|
Description: HDF5 and Stata I/O are broken on some architectures
Fix some issues, warn on use and xfail tests for the remainder
armhf TestHDF5Store::test*encoding only sometimes crashes
(1.1.3+dfsg-1 passed on build but failed autopkgtest)
HDF5 and Stata are known to fail on big-endian architectures
Stata also fails on qemu-ppc64el, but not real ppc64el
Author: Andreas Tille <tille@debian.org>, Graham Inggs <ginggs@debian.org>, Yaroslav Halchenko <debian@onerussian.com>, Rebecca N. Palmer <rebecca_palmer@zoho.com>
Bug-Debian: https://bugs.debian.org/877419
Forwarded: no
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -27,6 +27,10 @@ from typing import (
overload,
)
import warnings
+import platform
+import re
+from pandas.compat import is_platform_little_endian
+warn_hdf_platform = "Non-x86 system detected, HDF(5) format I/O may give wrong results (particularly on files created with older versions) or crash - https://bugs.debian.org/877419" if not bool(re.match('i.?86|x86',platform.uname()[4])) else False
import numpy as np
@@ -552,6 +556,8 @@ class HDFStore:
fletcher32: bool = False,
**kwargs,
) -> None:
+ if warn_hdf_platform:
+ warnings.warn(warn_hdf_platform)
if "format" in kwargs:
raise ValueError("format is not a defined argument for HDFStore")
@@ -773,7 +779,10 @@ class HDFStore:
self._handle.flush()
if fsync:
with suppress(OSError):
- os.fsync(self._handle.fileno())
+ if is_platform_little_endian():
+ os.fsync(self._handle.fileno())
+ else:
+ os.sync() # due to a pytables bad-cast bug, fileno is invalid on 64-bit big-endian#
def get(self, key: str):
"""
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -28,6 +28,9 @@ from typing import (
cast,
)
import warnings
+import platform
+import re
+warn_stata_platform = "Non-x86 system detected, Stata format I/O may give wrong results (particularly on strings) - https://bugs.debian.org/877419" if not bool(re.match('i.?86|x86',platform.uname()[4])) else False
from dateutil.relativedelta import relativedelta
import numpy as np
@@ -970,6 +973,8 @@ class StataParser:
# NOTE: the byte type seems to be reserved for categorical variables
# with a label, but the underlying variable is -127 to 100
# we're going to drop the label and cast to int
+ if warn_stata_platform:
+ warnings.warn(warn_stata_platform)
self.DTYPE_MAP = dict(
list(zip(range(1, 245), [np.dtype("a" + str(i)) for i in range(1, 245)]))
+ [
--- a/pandas/tests/io/pytables/test_file_handling.py
+++ b/pandas/tests/io/pytables/test_file_handling.py
@@ -22,6 +22,10 @@ from pandas.tests.io.pytables.common imp
ensure_clean_store,
tables,
)
+import platform
+import re
+import sys
+is_crashing_arch=bool((platform.uname()[4].startswith('arm') or platform.uname()[4].startswith('aarch')) and sys.maxsize<2**33) # meant for armhf, though this form will also skip on armel - uname = kernel arch
from pandas.io import pytables as pytables
from pandas.io.pytables import Term
@@ -263,6 +267,7 @@ def test_complibs(setup_path):
h5table.close()
+@pytest.mark.xfail(condition=is_crashing_arch,reason="https://bugs.debian.org/790925",strict=False,run=False)
@pytest.mark.skipif(
not is_platform_little_endian(), reason="reason platform is not little endian"
)
@@ -296,6 +301,7 @@ def test_encoding(setup_path):
],
)
@pytest.mark.parametrize("dtype", ["category", object])
+@pytest.mark.xfail(condition=is_crashing_arch,reason="https://bugs.debian.org/790925",strict=False,run=False)
def test_latin_encoding(setup_path, dtype, val):
enc = "latin-1"
nan_rep = ""
--- a/pandas/tests/io/pytables/test_append.py
+++ b/pandas/tests/io/pytables/test_append.py
@@ -23,6 +23,10 @@ from pandas.tests.io.pytables.common imp
ensure_clean_path,
ensure_clean_store,
)
+import platform
+import re
+import sys
+is_crashing_arch=bool((platform.uname()[4].startswith('arm') or platform.uname()[4].startswith('aarch')) and sys.maxsize<2**33) # meant for armhf, though this form will also skip on armel - uname = kernel arch
pytestmark = pytest.mark.single_cpu
@@ -276,6 +280,7 @@ def test_append_all_nans(setup_path):
tm.assert_frame_equal(store["df2"], df)
+@pytest.mark.xfail(condition=is_crashing_arch,reason="https://bugs.debian.org/790925",strict=False,run=False)
def test_append_frame_column_oriented(setup_path):
with ensure_clean_store(setup_path) as store:
--- a/pandas/tests/io/pytables/test_store.py
+++ b/pandas/tests/io/pytables/test_store.py
@@ -39,6 +39,10 @@ from pandas.io.pytables import (
HDFStore,
read_hdf,
)
+import platform
+import re
+import sys
+is_crashing_arch=bool((platform.uname()[4].startswith('arm') or platform.uname()[4].startswith('aarch')) and sys.maxsize<2**33) # meant for armhf, though this form will also skip on armel - uname = kernel arch
pytestmark = pytest.mark.single_cpu
@@ -790,6 +794,7 @@ def test_start_stop_fixed(setup_path):
df.iloc[8:10, -2] = np.nan
+@pytest.mark.xfail(condition=is_crashing_arch,reason="https://bugs.debian.org/790925",strict=False,run=False)
def test_select_filter_corner(setup_path):
df = DataFrame(np.random.randn(50, 100))
--- a/pandas/tests/io/pytables/test_read.py
+++ b/pandas/tests/io/pytables/test_read.py
@@ -5,7 +5,7 @@ import numpy as np
import pytest
from pandas._libs.tslibs import Timestamp
-from pandas.compat import is_platform_windows
+from pandas.compat import is_platform_windows, is_platform_little_endian
import pandas as pd
from pandas import (
@@ -155,6 +155,7 @@ def test_pytables_native2_read(datapath)
assert isinstance(d1, DataFrame)
+@pytest.mark.xfail(condition=not is_platform_little_endian(),reason="known failure of hdf on non-little endian",strict=False,raises=AttributeError)
def test_legacy_table_fixed_format_read_py2(datapath):
# GH 24510
# legacy table with fixed format written in Python 2
@@ -170,6 +171,7 @@ def test_legacy_table_fixed_format_read_
tm.assert_frame_equal(expected, result)
+@pytest.mark.xfail(condition=not is_platform_little_endian(),reason="known failure of hdf on non-little endian",strict=False,raises=AttributeError)
def test_legacy_table_fixed_format_read_datetime_py2(datapath):
# GH 31750
# legacy table with fixed format and datetime64 column written in Python 2
@@ -319,6 +321,7 @@ def test_read_hdf_series_mode_r(format,
tm.assert_series_equal(result, series)
+@pytest.mark.xfail(condition=not is_platform_little_endian(),reason="known failure of hdf on non-little endian",strict=False,raises=AttributeError)
def test_read_py2_hdf_file_in_py3(datapath):
# GH 16781
--- a/pandas/tests/io/test_stata.py
+++ b/pandas/tests/io/test_stata.py
@@ -36,6 +36,8 @@ from pandas.io.stata import (
read_stata,
)
+from pandas.compat import is_platform_little_endian
+pytestmark = pytest.mark.xfail(condition=not is_platform_little_endian(),reason="known failure of test_stata on non-little endian",strict=False)
@pytest.fixture
def mixed_frame():
@@ -148,7 +150,7 @@ class TestStata:
# )
# Remove resource warnings
- w = [x for x in w if x.category is UserWarning]
+ w = [x for x in w if x.category is UserWarning and not "Non-x86 system detected" in str(x.message)]
# should get warning for each call to read_dta
assert len(w) == 3
@@ -414,7 +416,7 @@ class TestStata:
warnings.simplefilter("always", InvalidColumnName)
original.to_stata(path, convert_dates=None, version=version)
# should get a warning for that format.
- assert len(w) == 1
+ assert len([x for x in w if not "Non-x86 system detected" in str(x.message)]) == 1
written_and_read_again = self.read_dta(path)
tm.assert_frame_equal(written_and_read_again.set_index("index"), formatted)
@@ -1759,8 +1761,9 @@ the string values returned are correct."
encoded = read_stata(
datapath("io", "data", "stata", "stata1_encoding_118.dta")
)
- assert len(w) == 151
- assert w[0].message.args[0] == msg
+ w2 = [x for x in w if not "Non-x86 system detected" in str(x.message)]
+ assert len(w2) == 151
+ assert w2[0].message.args[0] == msg
expected = DataFrame([["Düsseldorf"]] * 151, columns=["kreis1849"])
tm.assert_frame_equal(encoded, expected)
--- a/pandas/tests/io/pytables/test_timezones.py
+++ b/pandas/tests/io/pytables/test_timezones.py
@@ -8,6 +8,7 @@ import pytest
from pandas._libs.tslibs.timezones import maybe_get_tz
import pandas.util._test_decorators as td
+from pandas.compat import is_platform_little_endian
import pandas as pd
from pandas import (
@@ -304,6 +305,7 @@ def test_store_timezone(setup_path):
tm.assert_frame_equal(result, df)
+@pytest.mark.xfail(condition=not is_platform_little_endian(),reason="known failure of hdf on non-little endian",strict=False,raises=AttributeError)
def test_legacy_datetimetz_object(datapath):
# legacy from < 0.17.0
# 8260
@@ -356,6 +358,7 @@ def test_read_with_where_tz_aware_index(
tm.assert_frame_equal(result, expected)
+@pytest.mark.xfail(condition=not is_platform_little_endian(),reason="known failure of hdf on non-little endian",strict=False,raises=AttributeError)
def test_py2_created_with_datetimez(datapath):
# The test HDF5 file was created in Python 2, but could not be read in
# Python 3.
--- a/pandas/tests/io/test_common.py
+++ b/pandas/tests/io/test_common.py
@@ -16,7 +16,7 @@ import tempfile
import pytest
-from pandas.compat import is_platform_windows
+from pandas.compat import is_platform_windows, is_platform_little_endian
import pandas.util._test_decorators as td
import pandas as pd
@@ -301,11 +301,11 @@ Look,a snake,🐍"""
"pyarrow",
("io", "data", "feather", "feather-0_3_1.feather"),
),
- (
+ pytest.param(
pd.read_hdf,
"tables",
("io", "data", "legacy_hdf", "datetimetz_object.h5"),
- ),
+ marks=pytest.mark.xfail(condition=not is_platform_little_endian(),reason="known failure of hdf on non-little endian",strict=False,raises=AttributeError)),
(pd.read_stata, "os", ("io", "data", "stata", "stata10_115.dta")),
(pd.read_sas, "os", ("io", "sas", "data", "test1.sas7bdat")),
(pd.read_json, "os", ("io", "json", "data", "tsframe_v012.json")),
--- a/pandas/_testing/_warnings.py
+++ b/pandas/_testing/_warnings.py
@@ -13,6 +13,7 @@ from typing import (
cast,
)
import warnings
+import platform
@contextmanager
@@ -178,6 +179,8 @@ def _assert_caught_no_extra_warnings(
# due to these open files.
if any("matplotlib" in mod for mod in sys.modules):
continue
+ if actual_warning.category==UserWarning and "Non-x86 system detected" in str(actual_warning.message) and not bool(re.match('i.?86|x86',platform.uname()[4])):
+ continue
extra_warnings.append(
(
|