File: test_integral_codecs.py

package info (click to toggle)
pyodc 1.6.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 700 kB
  • sloc: python: 2,369; ansic: 86; makefile: 32
file content (124 lines) | stat: -rw-r--r-- 3,322 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import io

import pandas as pd
import pytest

from pyodc import codec
from pyodc.codec import select_codec
from pyodc.stream import LittleEndianStream


def _check_encode(codec, series, encode_compare):
    f = io.BytesIO()
    st = LittleEndianStream(f)

    for v in series:
        codec.encode(st, v)

    f.seek(0)
    assert f.read() == encode_compare


def test_int8_range_encoding():
    # Also test with negative numbers!

    for offset in (0, -100):
        s = pd.Series((1 + offset, 2**8 + offset))
        c = select_codec("column", s, None, None)

        assert isinstance(c, codec.Int8)
        assert c.min == 1 + offset

    _check_encode(c, s, b"\x00\xff")


def test_int16_range_encoding_minimal():
    """
    A span of integers that _just_ requires int16
    """

    # Also test with negative numbers!

    for offset in (0, -10000):
        s = pd.Series((1 + offset, 2**8 + offset + 1))
        c = select_codec("column", s, None, None)

        assert isinstance(c, codec.Int16)
        assert c.min == 1 + offset

        _check_encode(c, s, b"\x00\x00\x00\x01")


def test_int16_range_encoding_maximal():
    # Also test with negative numbers!

    for offset in (0, -10000):
        s = pd.Series((1 + offset, 2**8 + offset, 2**16 + offset))
        c = select_codec("column", s, None, None)

        assert isinstance(c, codec.Int16)
        assert c.min == 1 + offset

        _check_encode(c, s, b"\x00\x00\xff\x00\xff\xff")


def test_int32_range_encoding():
    """
    n.b. the Int32 codec is a bit crappy. It does _not_ include an offset value
    --> It only encodes the legit values of a SIGNED 32bit integer
    --> 64bit integers are todo (but break some fortran compatibility, as not all
        64bit integers can be represented as doubles).
    --> Can include missing values
    """
    s = pd.Series((-(2**31), None, 2**31 - 2))
    c = select_codec("column", s, None, None)

    assert isinstance(c, codec.Int32)
    assert c.min == -(2**31)

    _check_encode(c, s, b"\x00\x00\x00\x80\xff\xff\xff\x7f\xfe\xff\xff\x7f")


def test_wider_range_unsupported():
    s = pd.Series((-(2**31), 2**31 - 1))
    with pytest.raises(NotImplementedError):
        select_codec("column", s, None, None)


def test_int8_missing_range_encoding():
    # Also test with negative numbers!

    for offset in (0, -100):
        s = pd.Series((1 + offset, None, 2**8 + offset - 1))
        c = select_codec("column", s, None, None)

        assert isinstance(c, codec.Int8Missing)
        assert c.min == 1 + offset

    _check_encode(c, s, b"\x00\xff\xfe")


def test_int16_missing_range_encoding_minimal():
    # Also test with negative numbers!

    for offset in (0, -100):
        s = pd.Series((1 + offset, None, 2**8 + offset))
        c = select_codec("column", s, None, None)

        assert isinstance(c, codec.Int16Missing)
        assert c.min == 1 + offset

    _check_encode(c, s, b"\x00\x00\xff\xff\xff\x00")


def test_int16_missing_range_encoding_maximal():
    # Also test with negative numbers!

    for offset in (0, -100):
        s = pd.Series((1 + offset, None, 2**16 + offset - 1))
        c = select_codec("column", s, None, None)

        assert isinstance(c, codec.Int16Missing)
        assert c.min == 1 + offset

    _check_encode(c, s, b"\x00\x00\xff\xff\xfe\xff")