File: test_tables.py

package info (click to toggle)
python-whoosh 2.7.4%2Bgit6-g9134ad92-10
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 3,804 kB
  • sloc: python: 38,552; makefile: 118
file content (215 lines) | stat: -rw-r--r-- 6,110 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
# encoding: utf-8

from __future__ import with_statement
import random

from whoosh.compat import b, xrange, iteritems
from whoosh.filedb.filestore import RamStorage
from whoosh.filedb.filetables import HashReader, HashWriter
from whoosh.filedb.filetables import OrderedHashWriter, OrderedHashReader
from whoosh.util.testing import TempStorage


def test_hash_single():
    st = RamStorage()
    hw = HashWriter(st.create_file("test.hsh"))
    hw.add(b("alfa"), b("bravo"))
    hw.close()

    hr = HashReader.open(st, "test.hsh")
    assert hr.get(b("alfa")) == b("bravo")
    assert hr.get(b("foo")) is None


def test_hash():
    with TempStorage("hash") as st:
        hwf = st.create_file("test.hsh")
        hw = HashWriter(hwf)
        hw.add(b("foo"), b("bar"))
        hw.add(b("glonk"), b("baz"))
        hw.close()

        hr = HashReader.open(st, "test.hsh")
        assert hr.get(b("foo")) == b("bar")
        assert hr.get(b("baz")) is None
        hr.close()


def test_hash_extras():
    st = RamStorage()
    hw = HashWriter(st.create_file("test.hsh"))
    hw.extras["test"] = 100
    hw.add(b("foo"), b("bar"))
    hw.add(b("glonk"), b("baz"))
    hw.close()

    hr = HashReader.open(st, "test.hsh")
    assert hr.extras["test"] == 100
    assert hr.get(b("foo")) == b("bar")
    assert hr.get(b("baz")) is None
    hr.close()


def test_hash_contents():
    samp = [('alfa', 'bravo'), ('charlie', 'delta'), ('echo', 'foxtrot'),
            ('golf', 'hotel'), ('india', 'juliet'), ('kilo', 'lima'),
            ('mike', 'november'), ('oskar', 'papa'), ('quebec', 'romeo'),
            ('sierra', 'tango'), ('ultra', 'victor'), ('whiskey', 'xray'),
            ]
    # Convert to bytes
    samp = set((b(k), b(v)) for k, v in samp)

    with TempStorage("hashcontents") as st:
        hw = HashWriter(st.create_file("test.hsh"))
        hw.add_all(samp)
        hw.close()

        hr = HashReader.open(st, "test.hsh")

        probes = list(samp)
        random.shuffle(probes)
        for key, value in probes:
            assert hr[key] == value

        assert set(hr.keys()) == set([k for k, v in samp])
        assert set(hr.values()) == set([v for k, v in samp])
        assert set(hr.items()) == samp

        hr.close()


def test_random_hash():
    from string import ascii_letters as domain

    times = 1000
    minlen = 1
    maxlen = len(domain)

    def randstring():
        s = "".join(random.sample(domain, random.randint(minlen, maxlen)))
        return b(s)

    with TempStorage("randomhash") as st:
        samp = dict((randstring(), randstring()) for _ in xrange(times))

        hw = HashWriter(st.create_file("test.hsh"))
        for k, v in iteritems(samp):
            hw.add(k, v)
        hw.close()

        keys = list(samp.keys())
        random.shuffle(keys)
        hr = HashReader.open(st, "test.hsh")
        for k in keys:
            assert hr[k] == samp[k]
        hr.close()


def test_random_access():
    times = 1000
    with TempStorage("orderedhash") as st:
        hw = HashWriter(st.create_file("test.hsh"))
        hw.add_all((b("%08x" % x), b(str(x))) for x in xrange(times))
        hw.close()

        keys = list(range(times))
        random.shuffle(keys)
        hr = HashReader.open(st, "test.hsh")
        for x in keys:
            assert hr[b("%08x" % x)] == b(str(x))
        hr.close()


def test_ordered_closest():
    keys = ['alfa', 'bravo', 'charlie', 'delta', 'echo', 'foxtrot', 'golf',
            'hotel', 'india', 'juliet', 'kilo', 'lima', 'mike', 'november']
    # Make into bytes for Python 3
    keys = [b(k) for k in keys]
    values = [str(len(k)).encode("ascii") for k in keys]

    with TempStorage("orderedclosest") as st:
        hw = OrderedHashWriter(st.create_file("test.hsh"))
        hw.add_all(zip(keys, values))
        hw.close()

        hr = OrderedHashReader.open(st, "test.hsh")
        ck = hr.closest_key
        assert ck(b('')) == b('alfa')
        assert ck(b(' ')) == b('alfa')
        assert ck(b('alfa')) == b('alfa')
        assert ck(b('bravot')) == b('charlie')
        assert ck(b('charlie')) == b('charlie')
        assert ck(b('kiloton')) == b('lima')
        assert ck(b('oskar')) is None
        assert list(hr.keys()) == keys
        assert list(hr.values()) == values
        assert list(hr.keys_from(b('f'))) == keys[5:]
        hr.close()


def test_extras():
    st = RamStorage()
    hw = HashWriter(st.create_file("test"))
    hw.extras["test"] = 100
    hw.extras["blah"] = "foo"
    hw.close()

    hr = HashReader(st.open_file("test"), st.file_length("test"))
    assert hr.extras["test"] == 100
    assert hr.extras["blah"] == "foo"
    hr.close()

    hw = OrderedHashWriter(st.create_file("test"))
    hw.extras["test"] = 100
    hw.extras["blah"] = "foo"
    hw.close()

    hr = HashReader(st.open_file("test"), st.file_length("test"))
    assert hr.extras["test"] == 100
    assert hr.extras["blah"] == "foo"
    hr.close()

    hr = OrderedHashReader(st.open_file("test"), st.file_length("test"))
    assert hr.extras["test"] == 100
    assert hr.extras["blah"] == "foo"
    hr.close()


def test_checksum_file():
    from whoosh.filedb.structfile import ChecksumFile
    from zlib import crc32

    def wr(f):
        f.write(b("Testing"))
        f.write_int(-100)
        f.write_varint(10395)
        f.write_string(b("Hello"))
        f.write_ushort(32959)

    st = RamStorage()
    # Write a file normally
    f = st.create_file("control")
    wr(f)
    f.close()
    # Checksum the contents
    f = st.open_file("control")
    target = crc32(f.read()) & 0xffffffff
    f.close()

    # Write a file with checksumming
    f = st.create_file("test")
    cf = ChecksumFile(f)
    wr(cf)
    assert cf.checksum() == target
    f.close()

    # Read the file with checksumming
    f = st.open_file("test")
    cf = ChecksumFile(f)
    assert cf.read(7) == b("Testing")
    assert cf.read_int() == -100
    assert cf.read_varint() == 10395
    assert cf.read_string() == b("Hello")
    assert cf.read_ushort() == 32959
    assert cf.checksum() == target
    cf.close()