File: test_train_dictionary.py

package info (click to toggle)
python-zstandard 0.23.0-5
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 6,936 kB
  • sloc: ansic: 41,411; python: 8,665; makefile: 22; sh: 14
file content (107 lines) | stat: -rw-r--r-- 3,149 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import struct
import unittest

import zstandard as zstd

from .common import (
    generate_samples,
    get_optimal_dict_size_heuristically,
    random_input_data,
)


class TestTrainDictionary(unittest.TestCase):
    def test_no_args(self):
        with self.assertRaises(TypeError):
            zstd.train_dictionary()

    def test_bad_args(self):
        with self.assertRaises(TypeError):
            zstd.train_dictionary(8192, "foo")

        with self.assertRaises(ValueError):
            zstd.train_dictionary(8192, ["foo"])

    def test_no_params(self):
        d = zstd.train_dictionary(8192, random_input_data())
        self.assertIsInstance(d.dict_id(), int)

        # The dictionary ID may be different across platforms.
        expected = b"\x37\xa4\x30\xec" + struct.pack("<I", d.dict_id())

        data = d.as_bytes()
        self.assertEqual(data[0:8], expected)

    def test_basic(self):
        d = zstd.train_dictionary(8192, generate_samples(), k=500, d=8)
        self.assertIsInstance(d.dict_id(), int)

        data = d.as_bytes()
        self.assertEqual(data[0:4], b"\x37\xa4\x30\xec")

        self.assertEqual(d.k, 500)
        self.assertEqual(d.d, 8)

    def test_set_dict_id(self):
        samples = generate_samples()
        d = zstd.train_dictionary(
            get_optimal_dict_size_heuristically(samples),
            samples,
            k=64,
            d=8,
            dict_id=42,
        )
        self.assertEqual(d.dict_id(), 42)

    def test_optimize(self):
        samples = generate_samples()
        d = zstd.train_dictionary(
            get_optimal_dict_size_heuristically(samples),
            samples,
            threads=-1,
            steps=1,
            d=6,
            notifications=2,
        )

        # This varies by platform.
        self.assertIn(d.k, (50, 2000))
        self.assertEqual(d.d, 6)


class TestCompressionDict(unittest.TestCase):
    def test_bad_mode(self):
        with self.assertRaisesRegex(ValueError, "invalid dictionary load mode"):
            zstd.ZstdCompressionDict(b"foo", dict_type=42)

    def test_bad_precompute_compress(self):
        samples = generate_samples()
        d = zstd.train_dictionary(
            get_optimal_dict_size_heuristically(samples), samples, k=64, d=8
        )

        with self.assertRaisesRegex(
            ValueError, "must specify one of level or "
        ):
            d.precompute_compress()

        with self.assertRaisesRegex(
            ValueError, "must only specify one of level or "
        ):
            d.precompute_compress(
                level=3, compression_params=zstd.ZstdCompressionParameters()
            )

    def test_precompute_compress_rawcontent(self):
        d = zstd.ZstdCompressionDict(
            b"dictcontent" * 64, dict_type=zstd.DICT_TYPE_RAWCONTENT
        )
        d.precompute_compress(level=1)

        d = zstd.ZstdCompressionDict(
            b"dictcontent" * 64, dict_type=zstd.DICT_TYPE_FULLDICT
        )
        with self.assertRaisesRegex(
            zstd.ZstdError, "unable to precompute dictionary"
        ):
            d.precompute_compress(level=1)