File: test_mio.py

package info (click to toggle)
python-scipy 0.18.1-2
links: PTS, VCS
area: main
in suites: stretch
size: 75,464 kB
ctags: 79,406
sloc: python: 143,495; cpp: 89,357; fortran: 81,650; ansic: 79,778; makefile: 364; sh: 265
file content (1232 lines) | stat: -rw-r--r-- 42,155 bytes
#!/usr/bin/env python
# -*- coding: latin-1 -*-
''' Nose test generators

Need function load / save / roundtrip tests

'''
from __future__ import division, print_function, absolute_import

import os
from os.path import join as pjoin, dirname
from glob import glob
from io import BytesIO
from tempfile import mkdtemp

from scipy._lib.six import u, text_type, string_types

import warnings
import shutil
import gzip

from numpy.testing import (assert_array_equal, assert_array_almost_equal,
                           assert_equal, assert_raises, run_module_suite,
                           assert_)

import numpy as np
from numpy import array
import scipy.sparse as SP

import scipy.io.matlab.byteordercodes as boc
from scipy.io.matlab.miobase import matdims, MatWriteError, MatReadError
from scipy.io.matlab.mio import (mat_reader_factory, loadmat, savemat, whosmat)
from scipy.io.matlab.mio5 import (MatlabObject, MatFile5Writer, MatFile5Reader,
                                  MatlabFunction, varmats_from_mat,
                                  to_writeable, EmptyStructMarker)
from scipy.io.matlab import mio5_params as mio5p

test_data_path = pjoin(dirname(__file__), 'data')


def mlarr(*args, **kwargs):
    """Convenience function to return matlab-compatible 2D array."""
    arr = np.array(*args, **kwargs)
    arr.shape = matdims(arr)
    return arr

# Define cases to test
theta = np.pi/4*np.arange(9,dtype=float).reshape(1,9)
case_table4 = [
    {'name': 'double',
     'classes': {'testdouble': 'double'},
     'expected': {'testdouble': theta}
     }]
case_table4.append(
    {'name': 'string',
     'classes': {'teststring': 'char'},
     'expected': {'teststring':
                  array([u('"Do nine men interpret?" "Nine men," I nod.')])}
     })
case_table4.append(
    {'name': 'complex',
     'classes': {'testcomplex': 'double'},
     'expected': {'testcomplex': np.cos(theta) + 1j*np.sin(theta)}
     })
A = np.zeros((3,5))
A[0] = list(range(1,6))
A[:,0] = list(range(1,4))
case_table4.append(
    {'name': 'matrix',
     'classes': {'testmatrix': 'double'},
     'expected': {'testmatrix': A},
     })
case_table4.append(
    {'name': 'sparse',
     'classes': {'testsparse': 'sparse'},
     'expected': {'testsparse': SP.coo_matrix(A)},
     })
B = A.astype(complex)
B[0,0] += 1j
case_table4.append(
    {'name': 'sparsecomplex',
     'classes': {'testsparsecomplex': 'sparse'},
     'expected': {'testsparsecomplex': SP.coo_matrix(B)},
     })
case_table4.append(
    {'name': 'multi',
     'classes': {'theta': 'double', 'a': 'double'},
     'expected': {'theta': theta, 'a': A},
     })
case_table4.append(
    {'name': 'minus',
     'classes': {'testminus': 'double'},
     'expected': {'testminus': mlarr(-1)},
     })
case_table4.append(
    {'name': 'onechar',
     'classes': {'testonechar': 'char'},
     'expected': {'testonechar': array([u('r')])},
     })
# Cell arrays stored as object arrays
CA = mlarr((  # tuple for object array creation
        [],
        mlarr([1]),
        mlarr([[1,2]]),
        mlarr([[1,2,3]])), dtype=object).reshape(1,-1)
CA[0,0] = array(
    [u('This cell contains this string and 3 arrays of increasing length')])
case_table5 = [
    {'name': 'cell',
     'classes': {'testcell': 'cell'},
     'expected': {'testcell': CA}}]
CAE = mlarr((  # tuple for object array creation
    mlarr(1),
    mlarr(2),
    mlarr([]),
    mlarr([]),
    mlarr(3)), dtype=object).reshape(1,-1)
objarr = np.empty((1,1),dtype=object)
objarr[0,0] = mlarr(1)
case_table5.append(
    {'name': 'scalarcell',
     'classes': {'testscalarcell': 'cell'},
     'expected': {'testscalarcell': objarr}
     })
case_table5.append(
    {'name': 'emptycell',
     'classes': {'testemptycell': 'cell'},
     'expected': {'testemptycell': CAE}})
case_table5.append(
    {'name': 'stringarray',
     'classes': {'teststringarray': 'char'},
     'expected': {'teststringarray': array(
    [u('one  '), u('two  '), u('three')])},
     })
case_table5.append(
    {'name': '3dmatrix',
     'classes': {'test3dmatrix': 'double'},
     'expected': {
    'test3dmatrix': np.transpose(np.reshape(list(range(1,25)), (4,3,2)))}
     })
st_sub_arr = array([np.sqrt(2),np.exp(1),np.pi]).reshape(1,3)
dtype = [(n, object) for n in ['stringfield', 'doublefield', 'complexfield']]
st1 = np.zeros((1,1), dtype)
st1['stringfield'][0,0] = array([u('Rats live on no evil star.')])
st1['doublefield'][0,0] = st_sub_arr
st1['complexfield'][0,0] = st_sub_arr * (1 + 1j)
case_table5.append(
    {'name': 'struct',
     'classes': {'teststruct': 'struct'},
     'expected': {'teststruct': st1}
     })
CN = np.zeros((1,2), dtype=object)
CN[0,0] = mlarr(1)
CN[0,1] = np.zeros((1,3), dtype=object)
CN[0,1][0,0] = mlarr(2, dtype=np.uint8)
CN[0,1][0,1] = mlarr([[3]], dtype=np.uint8)
CN[0,1][0,2] = np.zeros((1,2), dtype=object)
CN[0,1][0,2][0,0] = mlarr(4, dtype=np.uint8)
CN[0,1][0,2][0,1] = mlarr(5, dtype=np.uint8)
case_table5.append(
    {'name': 'cellnest',
     'classes': {'testcellnest': 'cell'},
     'expected': {'testcellnest': CN},
     })
st2 = np.empty((1,1), dtype=[(n, object) for n in ['one', 'two']])
st2[0,0]['one'] = mlarr(1)
st2[0,0]['two'] = np.empty((1,1), dtype=[('three', object)])
st2[0,0]['two'][0,0]['three'] = array([u('number 3')])
case_table5.append(
    {'name': 'structnest',
     'classes': {'teststructnest': 'struct'},
     'expected': {'teststructnest': st2}
     })
a = np.empty((1,2), dtype=[(n, object) for n in ['one', 'two']])
a[0,0]['one'] = mlarr(1)
a[0,0]['two'] = mlarr(2)
a[0,1]['one'] = array([u('number 1')])
a[0,1]['two'] = array([u('number 2')])
case_table5.append(
    {'name': 'structarr',
     'classes': {'teststructarr': 'struct'},
     'expected': {'teststructarr': a}
     })
ODT = np.dtype([(n, object) for n in
                 ['expr', 'inputExpr', 'args',
                  'isEmpty', 'numArgs', 'version']])
MO = MatlabObject(np.zeros((1,1), dtype=ODT), 'inline')
m0 = MO[0,0]
m0['expr'] = array([u('x')])
m0['inputExpr'] = array([u(' x = INLINE_INPUTS_{1};')])
m0['args'] = array([u('x')])
m0['isEmpty'] = mlarr(0)
m0['numArgs'] = mlarr(1)
m0['version'] = mlarr(1)
case_table5.append(
    {'name': 'object',
     'classes': {'testobject': 'object'},
     'expected': {'testobject': MO}
     })
fp_u_str = open(pjoin(test_data_path, 'japanese_utf8.txt'), 'rb')
u_str = fp_u_str.read().decode('utf-8')
fp_u_str.close()
case_table5.append(
    {'name': 'unicode',
     'classes': {'testunicode': 'char'},
    'expected': {'testunicode': array([u_str])}
     })
case_table5.append(
    {'name': 'sparse',
     'classes': {'testsparse': 'sparse'},
     'expected': {'testsparse': SP.coo_matrix(A)},
     })
case_table5.append(
    {'name': 'sparsecomplex',
     'classes': {'testsparsecomplex': 'sparse'},
     'expected': {'testsparsecomplex': SP.coo_matrix(B)},
     })
case_table5.append(
    {'name': 'bool',
     'classes': {'testbools': 'logical'},
     'expected': {'testbools':
                  array([[True], [False]])},
     })

case_table5_rt = case_table5[:]
# Inline functions can't be concatenated in matlab, so RT only
case_table5_rt.append(
    {'name': 'objectarray',
     'classes': {'testobjectarray': 'object'},
     'expected': {'testobjectarray': np.repeat(MO, 2).reshape(1,2)}})


def types_compatible(var1, var2):
    """Check if types are same or compatible.

    0-D numpy scalars are compatible with bare python scalars.
    """
    type1 = type(var1)
    type2 = type(var2)
    if type1 is type2:
        return True
    if type1 is np.ndarray and var1.shape == ():
        return type(var1.item()) is type2
    if type2 is np.ndarray and var2.shape == ():
        return type(var2.item()) is type1
    return False


def _check_level(label, expected, actual):
    """ Check one level of a potentially nested array """
    if SP.issparse(expected):  # allow different types of sparse matrices
        assert_(SP.issparse(actual))
        assert_array_almost_equal(actual.todense(),
                                  expected.todense(),
                                  err_msg=label,
                                  decimal=5)
        return
    # Check types are as expected
    assert_(types_compatible(expected, actual),
            "Expected type %s, got %s at %s" %
            (type(expected), type(actual), label))
    # A field in a record array may not be an ndarray
    # A scalar from a record array will be type np.void
    if not isinstance(expected,
                      (np.void, np.ndarray, MatlabObject)):
        assert_equal(expected, actual)
        return
    # This is an ndarray-like thing
    assert_(expected.shape == actual.shape,
            msg='Expected shape %s, got %s at %s' % (expected.shape,
                                                     actual.shape,
                                                     label))
    ex_dtype = expected.dtype
    if ex_dtype.hasobject:  # array of objects
        if isinstance(expected, MatlabObject):
            assert_equal(expected.classname, actual.classname)
        for i, ev in enumerate(expected):
            level_label = "%s, [%d], " % (label, i)
            _check_level(level_label, ev, actual[i])
        return
    if ex_dtype.fields:  # probably recarray
        for fn in ex_dtype.fields:
            level_label = "%s, field %s, " % (label, fn)
            _check_level(level_label,
                         expected[fn], actual[fn])
        return
    if ex_dtype.type in (text_type,  # string or bool
                         np.unicode_,
                         np.bool_):
        assert_equal(actual, expected, err_msg=label)
        return
    # Something numeric
    assert_array_almost_equal(actual, expected, err_msg=label, decimal=5)


def _load_check_case(name, files, case):
    for file_name in files:
        matdict = loadmat(file_name, struct_as_record=True)
        label = "test %s; file %s" % (name, file_name)
        for k, expected in case.items():
            k_label = "%s, variable %s" % (label, k)
            assert_(k in matdict, "Missing key at %s" % k_label)
            _check_level(k_label, expected, matdict[k])


def _whos_check_case(name, files, case, classes):
    for file_name in files:
        label = "test %s; file %s" % (name, file_name)

        whos = whosmat(file_name)

        expected_whos = []
        for k, expected in case.items():
            expected_whos.append((k, expected.shape, classes[k]))

        whos.sort()
        expected_whos.sort()
        assert_equal(whos, expected_whos,
                     "%s: %r != %r" % (label, whos, expected_whos)
                     )


# Round trip tests
def _rt_check_case(name, expected, format):
    mat_stream = BytesIO()
    savemat(mat_stream, expected, format=format)
    mat_stream.seek(0)
    _load_check_case(name, [mat_stream], expected)


# generator for load tests
def test_load():
    for case in case_table4 + case_table5:
        name = case['name']
        expected = case['expected']
        filt = pjoin(test_data_path, 'test%s_*.mat' % name)
        files = glob(filt)
        assert_(len(files) > 0,
                "No files for test %s using filter %s" % (name, filt))
        yield _load_check_case, name, files, expected


# generator for whos tests
def test_whos():
    for case in case_table4 + case_table5:
        name = case['name']
        expected = case['expected']
        classes = case['classes']
        filt = pjoin(test_data_path, 'test%s_*.mat' % name)
        files = glob(filt)
        assert_(len(files) > 0,
                "No files for test %s using filter %s" % (name, filt))
        yield _whos_check_case, name, files, expected, classes


# generator for round trip tests
def test_round_trip():
    for case in case_table4 + case_table5_rt:
        case_table4_names = [case['name'] for case in case_table4]
        name = case['name'] + '_round_trip'
        expected = case['expected']
        for format in (['4', '5'] if case['name'] in case_table4_names else ['5']):
            yield _rt_check_case, name, expected, format


def test_gzip_simple():
    xdense = np.zeros((20,20))
    xdense[2,3] = 2.3
    xdense[4,5] = 4.5
    x = SP.csc_matrix(xdense)

    name = 'gzip_test'
    expected = {'x':x}
    format = '4'

    tmpdir = mkdtemp()
    try:
        fname = pjoin(tmpdir,name)
        mat_stream = gzip.open(fname,mode='wb')
        savemat(mat_stream, expected, format=format)
        mat_stream.close()

        mat_stream = gzip.open(fname,mode='rb')
        actual = loadmat(mat_stream, struct_as_record=True)
        mat_stream.close()
    finally:
        shutil.rmtree(tmpdir)

    assert_array_almost_equal(actual['x'].todense(),
                              expected['x'].todense(),
                              err_msg=repr(actual))


def test_multiple_open():
    # Ticket #1039, on Windows: check that files are not left open
    tmpdir = mkdtemp()
    try:
        x = dict(x=np.zeros((2, 2)))

        fname = pjoin(tmpdir, "a.mat")

        # Check that file is not left open
        savemat(fname, x)
        os.unlink(fname)
        savemat(fname, x)
        loadmat(fname)
        os.unlink(fname)

        # Check that stream is left open
        f = open(fname, 'wb')
        savemat(f, x)
        f.seek(0)
        f.close()

        f = open(fname, 'rb')
        loadmat(f)
        f.seek(0)
        f.close()
    finally:
        shutil.rmtree(tmpdir)


def test_mat73():
    # Check any hdf5 files raise an error
    filenames = glob(
        pjoin(test_data_path, 'testhdf5*.mat'))
    assert_(len(filenames) > 0)
    for filename in filenames:
        fp = open(filename, 'rb')
        assert_raises(NotImplementedError,
                      loadmat,
                      fp,
                      struct_as_record=True)
        fp.close()


def test_warnings():
    # This test is an echo of the previous behavior, which was to raise a
    # warning if the user triggered a search for mat files on the Python system
    # path.  We can remove the test in the next version after upcoming (0.13)
    fname = pjoin(test_data_path, 'testdouble_7.1_GLNX86.mat')
    with warnings.catch_warnings():
        warnings.simplefilter('error')
        # This should not generate a warning
        mres = loadmat(fname, struct_as_record=True)
        # This neither
        mres = loadmat(fname, struct_as_record=False)


def test_regression_653():
    # Saving a dictionary with only invalid keys used to raise an error. Now we
    # save this as an empty struct in matlab space.
    sio = BytesIO()
    savemat(sio, {'d':{1:2}}, format='5')
    back = loadmat(sio)['d']
    # Check we got an empty struct equivalent
    assert_equal(back.shape, (1,1))
    assert_equal(back.dtype, np.dtype(object))
    assert_(back[0,0] is None)


def test_structname_len():
    # Test limit for length of field names in structs
    lim = 31
    fldname = 'a' * lim
    st1 = np.zeros((1,1), dtype=[(fldname, object)])
    savemat(BytesIO(), {'longstruct': st1}, format='5')
    fldname = 'a' * (lim+1)
    st1 = np.zeros((1,1), dtype=[(fldname, object)])
    assert_raises(ValueError, savemat, BytesIO(),
                  {'longstruct': st1}, format='5')


def test_4_and_long_field_names_incompatible():
    # Long field names option not supported in 4
    my_struct = np.zeros((1,1),dtype=[('my_fieldname',object)])
    assert_raises(ValueError, savemat, BytesIO(),
                  {'my_struct':my_struct}, format='4', long_field_names=True)


def test_long_field_names():
    # Test limit for length of field names in structs
    lim = 63
    fldname = 'a' * lim
    st1 = np.zeros((1,1), dtype=[(fldname, object)])
    savemat(BytesIO(), {'longstruct': st1}, format='5',long_field_names=True)
    fldname = 'a' * (lim+1)
    st1 = np.zeros((1,1), dtype=[(fldname, object)])
    assert_raises(ValueError, savemat, BytesIO(),
                  {'longstruct': st1}, format='5',long_field_names=True)


def test_long_field_names_in_struct():
    # Regression test - long_field_names was erased if you passed a struct
    # within a struct
    lim = 63
    fldname = 'a' * lim
    cell = np.ndarray((1,2),dtype=object)
    st1 = np.zeros((1,1), dtype=[(fldname, object)])
    cell[0,0] = st1
    cell[0,1] = st1
    savemat(BytesIO(), {'longstruct': cell}, format='5',long_field_names=True)
    #
    # Check to make sure it fails with long field names off
    #
    assert_raises(ValueError, savemat, BytesIO(),
                  {'longstruct': cell}, format='5', long_field_names=False)


def test_cell_with_one_thing_in_it():
    # Regression test - make a cell array that's 1 x 2 and put two
    # strings in it.  It works. Make a cell array that's 1 x 1 and put
    # a string in it. It should work but, in the old days, it didn't.
    cells = np.ndarray((1,2),dtype=object)
    cells[0,0] = 'Hello'
    cells[0,1] = 'World'
    savemat(BytesIO(), {'x': cells}, format='5')

    cells = np.ndarray((1,1),dtype=object)
    cells[0,0] = 'Hello, world'
    savemat(BytesIO(), {'x': cells}, format='5')


def test_writer_properties():
    # Tests getting, setting of properties of matrix writer
    mfw = MatFile5Writer(BytesIO())
    yield assert_equal, mfw.global_vars, []
    mfw.global_vars = ['avar']
    yield assert_equal, mfw.global_vars, ['avar']
    yield assert_equal, mfw.unicode_strings, False
    mfw.unicode_strings = True
    yield assert_equal, mfw.unicode_strings, True
    yield assert_equal, mfw.long_field_names, False
    mfw.long_field_names = True
    yield assert_equal, mfw.long_field_names, True


def test_use_small_element():
    # Test whether we're using small data element or not
    sio = BytesIO()
    wtr = MatFile5Writer(sio)
    # First check size for no sde for name
    arr = np.zeros(10)
    wtr.put_variables({'aaaaa': arr})
    w_sz = len(sio.getvalue())
    # Check small name results in largish difference in size
    sio.truncate(0)
    sio.seek(0)
    wtr.put_variables({'aaaa': arr})
    yield assert_, w_sz - len(sio.getvalue()) > 4
    # Whereas increasing name size makes less difference
    sio.truncate(0)
    sio.seek(0)
    wtr.put_variables({'aaaaaa': arr})
    yield assert_, len(sio.getvalue()) - w_sz < 4


def test_save_dict():
    # Test that dict can be saved (as recarray), loaded as matstruct
    dict_types = ((dict, False),)
    try:
        from collections import OrderedDict
    except ImportError:
        pass
    else:
        dict_types += ((OrderedDict, True),)
    ab_exp = np.array([[(1, 2)]], dtype=[('a', object), ('b', object)])
    ba_exp = np.array([[(2, 1)]], dtype=[('b', object), ('a', object)])
    for dict_type, is_ordered in dict_types:
        # Initialize with tuples to keep order for OrderedDict
        d = dict_type([('a', 1), ('b', 2)])
        stream = BytesIO()
        savemat(stream, {'dict': d})
        stream.seek(0)
        vals = loadmat(stream)['dict']
        assert_equal(set(vals.dtype.names), set(['a', 'b']))
        if is_ordered:  # Input was ordered, output in ab order
            assert_array_equal(vals, ab_exp)
        else:  # Not ordered input, either order output
            if vals.dtype.names[0] == 'a':
                assert_array_equal(vals, ab_exp)
            else:
                assert_array_equal(vals, ba_exp)


def test_1d_shape():
    # New 5 behavior is 1D -> row vector
    arr = np.arange(5)
    for format in ('4', '5'):
        # Column is the default
        stream = BytesIO()
        savemat(stream, {'oned': arr}, format=format)
        vals = loadmat(stream)
        assert_equal(vals['oned'].shape, (1, 5))
        # can be explicitly 'column' for oned_as
        stream = BytesIO()
        savemat(stream, {'oned':arr},
                format=format,
                oned_as='column')
        vals = loadmat(stream)
        assert_equal(vals['oned'].shape, (5,1))
        # but different from 'row'
        stream = BytesIO()
        savemat(stream, {'oned':arr},
                format=format,
                oned_as='row')
        vals = loadmat(stream)
        assert_equal(vals['oned'].shape, (1,5))


def test_compression():
    arr = np.zeros(100).reshape((5,20))
    arr[2,10] = 1
    stream = BytesIO()
    savemat(stream, {'arr':arr})
    raw_len = len(stream.getvalue())
    vals = loadmat(stream)
    yield assert_array_equal, vals['arr'], arr
    stream = BytesIO()
    savemat(stream, {'arr':arr}, do_compression=True)
    compressed_len = len(stream.getvalue())
    vals = loadmat(stream)
    yield assert_array_equal, vals['arr'], arr
    yield assert_, raw_len > compressed_len
    # Concatenate, test later
    arr2 = arr.copy()
    arr2[0,0] = 1
    stream = BytesIO()
    savemat(stream, {'arr':arr, 'arr2':arr2}, do_compression=False)
    vals = loadmat(stream)
    yield assert_array_equal, vals['arr2'], arr2
    stream = BytesIO()
    savemat(stream, {'arr':arr, 'arr2':arr2}, do_compression=True)
    vals = loadmat(stream)
    yield assert_array_equal, vals['arr2'], arr2


def test_single_object():
    stream = BytesIO()
    savemat(stream, {'A':np.array(1, dtype=object)})


def test_skip_variable():
    # Test skipping over the first of two variables in a MAT file
    # using mat_reader_factory and put_variables to read them in.
    #
    # This is a regression test of a problem that's caused by
    # using the compressed file reader seek instead of the raw file
    # I/O seek when skipping over a compressed chunk.
    #
    # The problem arises when the chunk is large: this file has
    # a 256x256 array of random (uncompressible) doubles.
    #
    filename = pjoin(test_data_path,'test_skip_variable.mat')
    #
    # Prove that it loads with loadmat
    #
    d = loadmat(filename, struct_as_record=True)
    yield assert_, 'first' in d
    yield assert_, 'second' in d
    #
    # Make the factory
    #
    factory = mat_reader_factory(filename, struct_as_record=True)
    #
    # This is where the factory breaks with an error in MatMatrixGetter.to_next
    #
    d = factory.get_variables('second')
    yield assert_, 'second' in d
    factory.mat_stream.close()


def test_empty_struct():
    # ticket 885
    filename = pjoin(test_data_path,'test_empty_struct.mat')
    # before ticket fix, this would crash with ValueError, empty data
    # type
    d = loadmat(filename, struct_as_record=True)
    a = d['a']
    assert_equal(a.shape, (1,1))
    assert_equal(a.dtype, np.dtype(object))
    assert_(a[0,0] is None)
    stream = BytesIO()
    arr = np.array((), dtype='U')
    # before ticket fix, this used to give data type not understood
    savemat(stream, {'arr':arr})
    d = loadmat(stream)
    a2 = d['arr']
    assert_array_equal(a2, arr)


def test_save_empty_dict():
    # saving empty dict also gives empty struct
    stream = BytesIO()
    savemat(stream, {'arr': {}})
    d = loadmat(stream)
    a = d['arr']
    assert_equal(a.shape, (1,1))
    assert_equal(a.dtype, np.dtype(object))
    assert_(a[0,0] is None)


def assert_any_equal(output, alternatives):
    """ Assert `output` is equal to at least one element in `alternatives`
    """
    one_equal = False
    for expected in alternatives:
        if np.all(output == expected):
            one_equal = True
            break
    assert_(one_equal)


def test_to_writeable():
    # Test to_writeable function
    res = to_writeable(np.array([1]))  # pass through ndarrays
    assert_equal(res.shape, (1,))
    assert_array_equal(res, 1)
    # Dict fields can be written in any order
    expected1 = np.array([(1, 2)], dtype=[('a', '|O8'), ('b', '|O8')])
    expected2 = np.array([(2, 1)], dtype=[('b', '|O8'), ('a', '|O8')])
    alternatives = (expected1, expected2)
    assert_any_equal(to_writeable({'a':1,'b':2}), alternatives)
    # Fields with underscores discarded
    assert_any_equal(to_writeable({'a':1,'b':2, '_c':3}), alternatives)
    # Not-string fields discarded
    assert_any_equal(to_writeable({'a':1,'b':2, 100:3}), alternatives)
    # String fields that are valid Python identifiers discarded
    assert_any_equal(to_writeable({'a':1,'b':2, '99':3}), alternatives)
    # Object with field names is equivalent

    class klass(object):
        pass

    c = klass
    c.a = 1
    c.b = 2
    assert_any_equal(to_writeable(c), alternatives)
    # empty list and tuple go to empty array
    res = to_writeable([])
    assert_equal(res.shape, (0,))
    assert_equal(res.dtype.type, np.float64)
    res = to_writeable(())
    assert_equal(res.shape, (0,))
    assert_equal(res.dtype.type, np.float64)
    # None -> None
    assert_(to_writeable(None) is None)
    # String to strings
    assert_equal(to_writeable('a string').dtype.type, np.str_)
    # Scalars to numpy to numpy scalars
    res = to_writeable(1)
    assert_equal(res.shape, ())
    assert_equal(res.dtype.type, np.array(1).dtype.type)
    assert_array_equal(res, 1)
    # Empty dict returns EmptyStructMarker
    assert_(to_writeable({}) is EmptyStructMarker)
    # Object does not have (even empty) __dict__
    assert_(to_writeable(object()) is None)
    # Custom object does have empty __dict__, returns EmptyStructMarker

    class C(object):
        pass

    assert_(to_writeable(c()) is EmptyStructMarker)
    # dict keys with legal characters are convertible
    res = to_writeable({'a': 1})['a']
    assert_equal(res.shape, (1,))
    assert_equal(res.dtype.type, np.object_)
    # Only fields with illegal characters, falls back to EmptyStruct
    assert_(to_writeable({'1':1}) is EmptyStructMarker)
    assert_(to_writeable({'_a':1}) is EmptyStructMarker)
    # Unless there are valid fields, in which case structured array
    assert_equal(to_writeable({'1':1, 'f': 2}),
                 np.array([(2,)], dtype=[('f', '|O8')]))


def test_recarray():
    # check roundtrip of structured array
    dt = [('f1', 'f8'),
          ('f2', 'S10')]
    arr = np.zeros((2,), dtype=dt)
    arr[0]['f1'] = 0.5
    arr[0]['f2'] = 'python'
    arr[1]['f1'] = 99
    arr[1]['f2'] = 'not perl'
    stream = BytesIO()
    savemat(stream, {'arr': arr})
    d = loadmat(stream, struct_as_record=False)
    a20 = d['arr'][0,0]
    yield assert_equal, a20.f1, 0.5
    yield assert_equal, a20.f2, 'python'
    d = loadmat(stream, struct_as_record=True)
    a20 = d['arr'][0,0]
    yield assert_equal, a20['f1'], 0.5
    yield assert_equal, a20['f2'], 'python'
    # structs always come back as object types
    yield assert_equal, a20.dtype, np.dtype([('f1', 'O'),
                                             ('f2', 'O')])
    a21 = d['arr'].flat[1]
    yield assert_equal, a21['f1'], 99
    yield assert_equal, a21['f2'], 'not perl'


def test_save_object():
    class C(object):
        pass
    c = C()
    c.field1 = 1
    c.field2 = 'a string'
    stream = BytesIO()
    savemat(stream, {'c': c})
    d = loadmat(stream, struct_as_record=False)
    c2 = d['c'][0,0]
    assert_equal(c2.field1, 1)
    assert_equal(c2.field2, 'a string')
    d = loadmat(stream, struct_as_record=True)
    c2 = d['c'][0,0]
    assert_equal(c2['field1'], 1)
    assert_equal(c2['field2'], 'a string')


def test_read_opts():
    # tests if read is seeing option sets, at initialization and after
    # initialization
    arr = np.arange(6).reshape(1,6)
    stream = BytesIO()
    savemat(stream, {'a': arr})
    rdr = MatFile5Reader(stream)
    back_dict = rdr.get_variables()
    rarr = back_dict['a']
    assert_array_equal(rarr, arr)
    rdr = MatFile5Reader(stream, squeeze_me=True)
    assert_array_equal(rdr.get_variables()['a'], arr.reshape((6,)))
    rdr.squeeze_me = False
    assert_array_equal(rarr, arr)
    rdr = MatFile5Reader(stream, byte_order=boc.native_code)
    assert_array_equal(rdr.get_variables()['a'], arr)
    # inverted byte code leads to error on read because of swapped
    # header etc
    rdr = MatFile5Reader(stream, byte_order=boc.swapped_code)
    assert_raises(Exception, rdr.get_variables)
    rdr.byte_order = boc.native_code
    assert_array_equal(rdr.get_variables()['a'], arr)
    arr = np.array(['a string'])
    stream.truncate(0)
    stream.seek(0)
    savemat(stream, {'a': arr})
    rdr = MatFile5Reader(stream)
    assert_array_equal(rdr.get_variables()['a'], arr)
    rdr = MatFile5Reader(stream, chars_as_strings=False)
    carr = np.atleast_2d(np.array(list(arr.item()), dtype='U1'))
    assert_array_equal(rdr.get_variables()['a'], carr)
    rdr.chars_as_strings = True
    assert_array_equal(rdr.get_variables()['a'], arr)


def test_empty_string():
    # make sure reading empty string does not raise error
    estring_fname = pjoin(test_data_path, 'single_empty_string.mat')
    fp = open(estring_fname, 'rb')
    rdr = MatFile5Reader(fp)
    d = rdr.get_variables()
    fp.close()
    assert_array_equal(d['a'], np.array([], dtype='U1'))
    # empty string round trip.  Matlab cannot distiguish
    # between a string array that is empty, and a string array
    # containing a single empty string, because it stores strings as
    # arrays of char.  There is no way of having an array of char that
    # is not empty, but contains an empty string.
    stream = BytesIO()
    savemat(stream, {'a': np.array([''])})
    rdr = MatFile5Reader(stream)
    d = rdr.get_variables()
    assert_array_equal(d['a'], np.array([], dtype='U1'))
    stream.truncate(0)
    stream.seek(0)
    savemat(stream, {'a': np.array([], dtype='U1')})
    rdr = MatFile5Reader(stream)
    d = rdr.get_variables()
    assert_array_equal(d['a'], np.array([], dtype='U1'))
    stream.close()


def test_corrupted_data():
    import zlib
    for exc, fname in [(ValueError, 'corrupted_zlib_data.mat'),
                       (zlib.error, 'corrupted_zlib_checksum.mat')]:
        with open(pjoin(test_data_path, fname), 'rb') as fp:
            rdr = MatFile5Reader(fp)
            assert_raises(exc, rdr.get_variables)


def test_corrupted_data_check_can_be_disabled():
    with open(pjoin(test_data_path, 'corrupted_zlib_data.mat'), 'rb') as fp:
        rdr = MatFile5Reader(fp, verify_compressed_data_integrity=False)
        rdr.get_variables()


def test_read_both_endian():
    # make sure big- and little- endian data is read correctly
    for fname in ('big_endian.mat', 'little_endian.mat'):
        fp = open(pjoin(test_data_path, fname), 'rb')
        rdr = MatFile5Reader(fp)
        d = rdr.get_variables()
        fp.close()
        assert_array_equal(d['strings'],
                           np.array([['hello'],
                                     ['world']], dtype=object))
        assert_array_equal(d['floats'],
                           np.array([[2., 3.],
                                     [3., 4.]], dtype=np.float32))


def test_write_opposite_endian():
    # We don't support writing opposite endian .mat files, but we need to behave
    # correctly if the user supplies an other-endian numpy array to write out
    float_arr = np.array([[2., 3.],
                          [3., 4.]])
    int_arr = np.arange(6).reshape((2, 3))
    uni_arr = np.array(['hello', 'world'], dtype='U')
    stream = BytesIO()
    savemat(stream, {'floats': float_arr.byteswap().newbyteorder(),
                            'ints': int_arr.byteswap().newbyteorder(),
                            'uni_arr': uni_arr.byteswap().newbyteorder()})
    rdr = MatFile5Reader(stream)
    d = rdr.get_variables()
    assert_array_equal(d['floats'], float_arr)
    assert_array_equal(d['ints'], int_arr)
    assert_array_equal(d['uni_arr'], uni_arr)
    stream.close()


def test_logical_array():
    # The roundtrip test doesn't verify that we load the data up with the
    # correct (bool) dtype
    with open(pjoin(test_data_path, 'testbool_8_WIN64.mat'), 'rb') as fobj:
        rdr = MatFile5Reader(fobj, mat_dtype=True)
        d = rdr.get_variables()
    x = np.array([[True], [False]], dtype=np.bool_)
    assert_array_equal(d['testbools'], x)
    assert_equal(d['testbools'].dtype, x.dtype)


def test_logical_out_type():
    # Confirm that bool type written as uint8, uint8 class
    # See gh-4022
    stream = BytesIO()
    barr = np.array([False, True, False])
    savemat(stream, {'barray': barr})
    stream.seek(0)
    reader = MatFile5Reader(stream)
    reader.initialize_read()
    reader.read_file_header()
    hdr, _ = reader.read_var_header()
    assert_equal(hdr.mclass, mio5p.mxUINT8_CLASS)
    assert_equal(hdr.is_logical, True)
    var = reader.read_var_array(hdr, False)
    assert_equal(var.dtype.type, np.uint8)


def test_mat4_3d():
    # test behavior when writing 3D arrays to matlab 4 files
    stream = BytesIO()
    arr = np.arange(24).reshape((2,3,4))
    assert_raises(ValueError, savemat, stream, {'a': arr}, True, '4')


def test_func_read():
    func_eg = pjoin(test_data_path, 'testfunc_7.4_GLNX86.mat')
    fp = open(func_eg, 'rb')
    rdr = MatFile5Reader(fp)
    d = rdr.get_variables()
    fp.close()
    assert_(isinstance(d['testfunc'], MatlabFunction))
    stream = BytesIO()
    wtr = MatFile5Writer(stream)
    assert_raises(MatWriteError, wtr.put_variables, d)


def test_mat_dtype():
    double_eg = pjoin(test_data_path, 'testmatrix_6.1_SOL2.mat')
    fp = open(double_eg, 'rb')
    rdr = MatFile5Reader(fp, mat_dtype=False)
    d = rdr.get_variables()
    fp.close()
    yield assert_equal, d['testmatrix'].dtype.kind, 'u'

    fp = open(double_eg, 'rb')
    rdr = MatFile5Reader(fp, mat_dtype=True)
    d = rdr.get_variables()
    fp.close()
    yield assert_equal, d['testmatrix'].dtype.kind, 'f'


def test_sparse_in_struct():
    # reproduces bug found by DC where Cython code was insisting on
    # ndarray return type, but getting sparse matrix
    st = {'sparsefield': SP.coo_matrix(np.eye(4))}
    stream = BytesIO()
    savemat(stream, {'a':st})
    d = loadmat(stream, struct_as_record=True)
    yield assert_array_equal, d['a'][0,0]['sparsefield'].todense(), np.eye(4)


def test_mat_struct_squeeze():
    stream = BytesIO()
    in_d = {'st':{'one':1, 'two':2}}
    savemat(stream, in_d)
    # no error without squeeze
    out_d = loadmat(stream, struct_as_record=False)
    # previous error was with squeeze, with mat_struct
    out_d = loadmat(stream,
                    struct_as_record=False,
                    squeeze_me=True,
                    )


def test_scalar_squeeze():
    stream = BytesIO()
    in_d = {'scalar': [[0.1]], 'string': 'my name', 'st':{'one':1, 'two':2}}
    savemat(stream, in_d)
    out_d = loadmat(stream, squeeze_me=True)
    assert_(isinstance(out_d['scalar'], float))
    assert_(isinstance(out_d['string'], string_types))
    assert_(isinstance(out_d['st'], np.ndarray))


def test_str_round():
    # from report by Angus McMorland on mailing list 3 May 2010
    stream = BytesIO()
    in_arr = np.array(['Hello', 'Foob'])
    out_arr = np.array(['Hello', 'Foob '])
    savemat(stream, dict(a=in_arr))
    res = loadmat(stream)
    # resulted in ['HloolFoa', 'elWrdobr']
    assert_array_equal(res['a'], out_arr)
    stream.truncate(0)
    stream.seek(0)
    # Make Fortran ordered version of string
    in_str = in_arr.tostring(order='F')
    in_from_str = np.ndarray(shape=a.shape,
                             dtype=in_arr.dtype,
                             order='F',
                             buffer=in_str)
    savemat(stream, dict(a=in_from_str))
    assert_array_equal(res['a'], out_arr)
    # unicode save did lead to buffer too small error
    stream.truncate(0)
    stream.seek(0)
    in_arr_u = in_arr.astype('U')
    out_arr_u = out_arr.astype('U')
    savemat(stream, {'a': in_arr_u})
    res = loadmat(stream)
    assert_array_equal(res['a'], out_arr_u)


def test_fieldnames():
    # Check that field names are as expected
    stream = BytesIO()
    savemat(stream, {'a': {'a':1, 'b':2}})
    res = loadmat(stream)
    field_names = res['a'].dtype.names
    assert_equal(set(field_names), set(('a', 'b')))


def test_loadmat_varnames():
    # Test that we can get just one variable from a mat file using loadmat
    mat5_sys_names = ['__globals__',
                      '__header__',
                      '__version__']
    for eg_file, sys_v_names in (
        (pjoin(test_data_path, 'testmulti_4.2c_SOL2.mat'), []), (pjoin(
            test_data_path, 'testmulti_7.4_GLNX86.mat'), mat5_sys_names)):
        vars = loadmat(eg_file)
        assert_equal(set(vars.keys()), set(['a', 'theta'] + sys_v_names))
        vars = loadmat(eg_file, variable_names='a')
        assert_equal(set(vars.keys()), set(['a'] + sys_v_names))
        vars = loadmat(eg_file, variable_names=['a'])
        assert_equal(set(vars.keys()), set(['a'] + sys_v_names))
        vars = loadmat(eg_file, variable_names=['theta'])
        assert_equal(set(vars.keys()), set(['theta'] + sys_v_names))
        vars = loadmat(eg_file, variable_names=('theta',))
        assert_equal(set(vars.keys()), set(['theta'] + sys_v_names))
        vars = loadmat(eg_file, variable_names=[])
        assert_equal(set(vars.keys()), set(sys_v_names))
        vnames = ['theta']
        vars = loadmat(eg_file, variable_names=vnames)
        assert_equal(vnames, ['theta'])


def test_round_types():
    # Check that saving, loading preserves dtype in most cases
    arr = np.arange(10)
    stream = BytesIO()
    for dts in ('f8','f4','i8','i4','i2','i1',
                'u8','u4','u2','u1','c16','c8'):
        stream.truncate(0)
        stream.seek(0)  # needed for BytesIO in python 3
        savemat(stream, {'arr': arr.astype(dts)})
        vars = loadmat(stream)
        assert_equal(np.dtype(dts), vars['arr'].dtype)


def test_varmats_from_mat():
    # Make a mat file with several variables, write it, read it back
    names_vars = (('arr', mlarr(np.arange(10))),
                  ('mystr', mlarr('a string')),
                  ('mynum', mlarr(10)))

    # Dict like thing to give variables in defined order
    class C(object):
        def items(self):
            return names_vars
    stream = BytesIO()
    savemat(stream, C())
    varmats = varmats_from_mat(stream)
    assert_equal(len(varmats), 3)
    for i in range(3):
        name, var_stream = varmats[i]
        exp_name, exp_res = names_vars[i]
        assert_equal(name, exp_name)
        res = loadmat(var_stream)
        assert_array_equal(res[name], exp_res)


def test_one_by_zero():
    # Test 1x0 chars get read correctly
    func_eg = pjoin(test_data_path, 'one_by_zero_char.mat')
    fp = open(func_eg, 'rb')
    rdr = MatFile5Reader(fp)
    d = rdr.get_variables()
    fp.close()
    assert_equal(d['var'].shape, (0,))


def test_load_mat4_le():
    # We were getting byte order wrong when reading little-endian floa64 dense
    # matrices on big-endian platforms
    mat4_fname = pjoin(test_data_path, 'test_mat4_le_floats.mat')
    vars = loadmat(mat4_fname)
    assert_array_equal(vars['a'], [[0.1, 1.2]])


def test_unicode_mat4():
    # Mat4 should save unicode as latin1
    bio = BytesIO()
    var = {'second_cat': u('Schrödinger')}
    savemat(bio, var, format='4')
    var_back = loadmat(bio)
    assert_equal(var_back['second_cat'], var['second_cat'])


def test_logical_sparse():
    # Test we can read logical sparse stored in mat file as bytes.
    # See https://github.com/scipy/scipy/issues/3539.
    # In some files saved by MATLAB, the sparse data elements (Real Part
    # Subelement in MATLAB speak) are stored with apparent type double
    # (miDOUBLE) but are in fact single bytes.
    filename = pjoin(test_data_path,'logical_sparse.mat')
    # Before fix, this would crash with:
    # ValueError: indices and data should have the same size
    d = loadmat(filename, struct_as_record=True)
    log_sp = d['sp_log_5_4']
    assert_(isinstance(log_sp, SP.csc_matrix))
    assert_equal(log_sp.dtype.type, np.bool_)
    assert_array_equal(log_sp.toarray(),
                       [[True, True, True, False],
                        [False, False, True, False],
                        [False, False, True, False],
                        [False, False, False, False],
                        [False, False, False, False]])


def test_empty_sparse():
    # Can we read empty sparse matrices?
    sio = BytesIO()
    import scipy.sparse
    empty_sparse = scipy.sparse.csr_matrix([[0,0],[0,0]])
    savemat(sio, dict(x=empty_sparse))
    sio.seek(0)
    res = loadmat(sio)
    assert_array_equal(res['x'].shape, empty_sparse.shape)
    assert_array_equal(res['x'].todense(), 0)
    # Do empty sparse matrices get written with max nnz 1?
    # See https://github.com/scipy/scipy/issues/4208
    sio.seek(0)
    reader = MatFile5Reader(sio)
    reader.initialize_read()
    reader.read_file_header()
    hdr, _ = reader.read_var_header()
    assert_equal(hdr.nzmax, 1)


def test_empty_mat_error():
    # Test we get a specific warning for an empty mat file
    sio = BytesIO()
    assert_raises(MatReadError, loadmat, sio)


def test_miuint32_compromise():
    # Reader should accept miUINT32 for miINT32, but check signs
    # mat file with miUINT32 for miINT32, but OK values
    filename = pjoin(test_data_path, 'miuint32_for_miint32.mat')
    res = loadmat(filename)
    assert_equal(res['an_array'], np.arange(10)[None, :])
    # mat file with miUINT32 for miINT32, with negative value
    filename = pjoin(test_data_path, 'bad_miuint32.mat')
    with warnings.catch_warnings(record=True):  # Py3k ResourceWarning
        assert_raises(ValueError, loadmat, filename)


def test_miutf8_for_miint8_compromise():
    # Check reader accepts ascii as miUTF8 for array names
    filename = pjoin(test_data_path, 'miutf8_array_name.mat')
    res = loadmat(filename)
    assert_equal(res['array_name'], [[1]])
    # mat file with non-ascii utf8 name raises error
    filename = pjoin(test_data_path, 'bad_miutf8_array_name.mat')
    with warnings.catch_warnings(record=True):  # Py3k ResourceWarning
        assert_raises(ValueError, loadmat, filename)


def test_bad_utf8():
    # Check that reader reads bad UTF with 'replace' option
    filename = pjoin(test_data_path,'broken_utf8.mat')
    res = loadmat(filename)
    assert_equal(res['bad_string'],
                 b'\x80 am broken'.decode('utf8', 'replace'))


if __name__ == "__main__":
    run_module_suite()