File: postgres-search-bench.py

package info (click to toggle)
pytables 3.10.2-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 15,228 kB
  • sloc: ansic: 82,212; python: 65,296; cpp: 753; sh: 394; makefile: 100
file content (258 lines) | stat: -rw-r--r-- 6,681 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
import random
from time import perf_counter as clock

import numpy as np

DSN = "dbname=test port = 5435"

# in order to always generate the same random sequence
random.seed(19)


def flatten(line):
    """Flattens list of tuples l."""
    return [x[0] for x in line]


def fill_arrays(start, stop):
    col_i = np.arange(start, stop, type=np.int32)
    if userandom:
        col_j = np.random.uniform(0, nrows, size=[stop - start])
    else:
        col_j = np.array(col_i, type=np.float64)
    return col_i, col_j


# Generator for ensure pytables benchmark compatibility


def int_generator(nrows):
    step = 1000 * 100
    j = 0
    for i in range(nrows):
        if i >= step * j:
            stop = (j + 1) * step
            if stop > nrows:  # Seems unnecessary
                stop = nrows
            col_i, col_j = fill_arrays(i, stop)
            j += 1
            k = 0
        yield (col_i[k], col_j[k])
        k += 1


def int_generator_slow(nrows):
    for i in range(nrows):
        if userandom:
            yield (i, float(random.randint(0, nrows)))
        else:
            yield (i, float(i))


class Stream32:

    "Object simulating a file for reading"

    def __init__(self):
        self.n = None
        self.read_it = self.read_iter()

    # No va! Hi ha que convertir a un de normal!
    def readline(self, n=None):
        for tup in int_generator(nrows):
            sout = "%s\t%s\n" % tup
            if n is not None and len(sout) > n:
                for i in range(0, len(sout), n):
                    yield sout[i : i + n]
            else:
                yield sout

    def read_iter(self):
        sout = ""
        n = self.n
        for tup in int_generator(nrows):
            sout += "%s\t%s\n" % tup
            if n is not None and len(sout) > n:
                for i in range(n, len(sout), n):
                    rout = sout[:n]
                    sout = sout[n:]
                    yield rout
        yield sout

    def read(self, n=None):
        self.n = n
        try:
            str_ = next(self.read_it)
        except StopIteration:
            str_ = ""
        return str_


def open_db(filename, remove=0):
    if not filename:
        con = sqlite.connect(DSN)
    else:
        con = sqlite.connect(filename)
    cur = con.cursor()
    return con, cur


def create_db(filename, nrows):
    con, cur = open_db(filename, remove=1)
    try:
        cur.execute("create table ints(i integer, j double precision)")
    except Exception:
        con.rollback()
        cur.execute("DROP TABLE ints")
        cur.execute("create table ints(i integer, j double precision)")
    con.commit()
    con.set_isolation_level(2)
    t1 = clock()
    st = Stream32()
    cur.copy_from(st, "ints")
    # In case of postgres, the speeds of generator and loop are similar
    # cur.executemany("insert into ints values (%s,%s)", int_generator(nrows))
    #     for i in xrange(nrows):
    #         cur.execute("insert into ints values (%s,%s)", (i, float(i)))
    con.commit()
    ctime = clock() - t1
    if verbose:
        print(f"insert time: {ctime:.5f}")
        print(f"Krows/s: {nrows / 1000 / ctime:.5f}")
    close_db(con, cur)


def index_db(filename):
    con, cur = open_db(filename)
    t1 = clock()
    cur.execute("create index ij on ints(j)")
    con.commit()
    itime = clock() - t1
    if verbose:
        print(f"index time: {itime:.5f}")
        print(f"Krows/s: {nrows / itime:.5f}")
    # Close the DB
    close_db(con, cur)


def query_db(filename, rng):
    con, cur = open_db(filename)
    t1 = clock()
    ntimes = 10
    for i in range(ntimes):
        # between clause does not seem to take advantage of indexes
        # cur.execute("select j from ints where j between %s and %s" % \
        cur.execute(
            "select i from ints where j >= %s and j <= %s"
            %
            # cur.execute("select i from ints where i >= %s and i <=
            # %s" %
            (rng[0] + i, rng[1] + i)
        )
        results = cur.fetchall()
    con.commit()
    qtime = (clock() - t1) / ntimes
    if verbose:
        print(f"query time: {qtime:.5f}")
        print(f"Mrows/s: {nrows / 1000 / qtime:.5f}")
        results = sorted(flatten(results))
        print(results)
    close_db(con, cur)


def close_db(con, cur):
    cur.close()
    con.close()


if __name__ == "__main__":
    import sys
    import getopt

    try:
        import psyco

        psyco_imported = 1
    except Exception:
        psyco_imported = 0

    usage = (
        """usage: %s [-v] [-p] [-m] [-i] [-q] [-c] [-R range] [-n nrows] file
            -v verbose
            -p use "psyco" if available
            -m use random values to fill the table
            -q do query
            -c create the database
            -i index the table
            -2 use sqlite2 (default is use sqlite3)
            -R select a range in a field in the form "start,stop" (def "0,10")
            -n sets the number of rows (in krows) in each table
            \n"""
        % sys.argv[0]
    )

    try:
        opts, pargs = getopt.getopt(sys.argv[1:], "vpmiqc2R:n:")
    except Exception:
        sys.stderr.write(usage)
        sys.exit(0)

    # default options
    verbose = 0
    usepsyco = 0
    userandom = 0
    docreate = 0
    createindex = 0
    doquery = 0
    sqlite_version = "3"
    rng = [0, 10]
    nrows = 1

    # Get the options
    for option in opts:
        if option[0] == "-v":
            verbose = 1
        elif option[0] == "-p":
            usepsyco = 1
        elif option[0] == "-m":
            userandom = 1
        elif option[0] == "-i":
            createindex = 1
        elif option[0] == "-q":
            doquery = 1
        elif option[0] == "-c":
            docreate = 1
        elif option[0] == "-2":
            sqlite_version = "2"
        elif option[0] == "-R":
            rng = [int(i) for i in option[1].split(",")]
        elif option[0] == "-n":
            nrows = int(option[1])

    # Catch the hdf5 file passed as the last argument
    filename = pargs[0]

    #     if sqlite_version == "2":
    #         import sqlite
    #     else:
    #         from pysqlite2 import dbapi2 as sqlite
    import psycopg2 as sqlite

    if verbose:
        # print "pysqlite version:", sqlite.version
        if userandom:
            print("using random values")

    if docreate:
        if verbose:
            print("writing %s krows" % nrows)
        if psyco_imported and usepsyco:
            psyco.bind(create_db)
        nrows *= 1000
        create_db(filename, nrows)

    if createindex:
        index_db(filename)

    if doquery:
        query_db(filename, rng)