File: search-bench-plot.py

package info (click to toggle)
pytables 3.11.0-2
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 15,284 kB
  • sloc: ansic: 82,216; python: 65,566; cpp: 753; sh: 394; makefile: 106
file content (172 lines) | stat: -rw-r--r-- 4,835 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
from matplotlib import pyplot as plt

import tables as tb


def get_values(filename, complib=""):
    f = tb.open_file(filename)
    nrows = f.root.small.create_best.cols.nrows[:]
    corrected_sizes = nrows / 10**6
    if mb_units:
        corrected_sizes = 16 * nrows / 10**6
    if insert:
        values = corrected_sizes / f.root.small.create_best.cols.tfill[:]
    if table_size:
        values = f.root.small.create_best.cols.fsize[:] / nrows
    if query:
        values = (
            corrected_sizes
            / f.root.small.search_best.inkernel.int.cols.time1[:]
        )
    if query_cache:
        values = (
            corrected_sizes
            / f.root.small.search_best.inkernel.int.cols.time2[:]
        )

    f.close()
    return nrows, values


def show_plot(plots, yaxis, legends, gtitle):
    plt.xlabel("Number of rows")
    plt.ylabel(yaxis)
    plt.xlim(10**3, 10**8)
    plt.title(gtitle)
    plt.grid(True)

    #     legends = [f[f.find('-'):f.index('.out')] for f in filenames]
    #     legends = [l.replace('-', ' ') for l in legends]
    if table_size:
        plt.legend([p[0] for p in plots], legends, loc="upper right")
    else:
        plt.legend([p[0] for p in plots], legends, loc="upper left")

    # subplots_adjust(bottom=0.2, top=None, wspace=0.2, hspace=0.2)
    if outfile:
        plt.savefig(outfile)
    else:
        plt.show()


if __name__ == "__main__":

    import sys
    import getopt

    usage = (
        """usage: %s [-o file] [-t title] [--insert] [--table-size] [--query] [--query-cache] [--MB-units] files
 -o filename for output (only .png and .jpg extensions supported)
 -t title of the plot
 --insert -- Insert time for table
 --table-size -- Size of table
 --query -- Time for querying the integer column
 --query-cache -- Time for querying the integer (cached)
 --MB-units -- Express speed in MB/s instead of MRows/s
 \n"""
        % sys.argv[0]
    )

    try:
        opts, pargs = getopt.getopt(
            sys.argv[1:],
            "o:t:",
            [
                "insert",
                "table-size",
                "query",
                "query-cache",
                "MB-units",
            ],
        )
    except Exception:
        sys.stderr.write(usage)
        sys.exit(0)

    progname = sys.argv[0]
    args = sys.argv[1:]

    # if we pass too few parameters, abort
    if len(pargs) < 1:
        sys.stderr.write(usage)
        sys.exit(0)

    # default options
    outfile = None
    insert = 0
    table_size = 0
    query = 0
    query_cache = 0
    mb_units = 0
    yaxis = "No axis name"
    tit = None
    gtitle = "Please set a title!"

    # Get the options
    for option in opts:
        if option[0] == "-o":
            outfile = option[1]
        elif option[0] == "-t":
            tit = option[1]
        elif option[0] == "--insert":
            insert = 1
            yaxis = "MRows/s"
            gtitle = "Writing with small (16 bytes) record size"
        elif option[0] == "--table-size":
            table_size = 1
            yaxis = "Bytes/row"
            gtitle = (
                "Disk space taken by a record (original record size: "
                "16 bytes)"
            )
        elif option[0] == "--query":
            query = 1
            yaxis = "MRows/s"
            gtitle = (
                "Selecting with small (16 bytes) record size (file not "
                "in cache)"
            )
        elif option[0] == "--query-cache":
            query_cache = 1
            yaxis = "MRows/s"
            gtitle = (
                "Selecting with small (16 bytes) record size (file in "
                "cache)"
            )
        elif option[0] == "--MB-units":
            mb_units = 1

    filenames = pargs

    if mb_units and yaxis == "MRows/s":
        yaxis = "MB/s"

    if tit:
        gtitle = tit

    plots = []
    legends = []
    for filename in filenames:
        plegend = filename[filename.find("cl-") + 3 : filename.index(".h5")]
        plegend = plegend.replace("-", " ")
        xval, yval = get_values(filename, "")
        print(f"Values for {filename} --> {xval}, {yval}")
        # plots.append(loglog(xval, yval, linewidth=5))
        plots.append(plt.semilogx(xval, yval, linewidth=4))
        legends.append(plegend)
    if 0:  # Per a introduir dades simulades si es vol...
        xval = [
            1000,
            10_000,
            100_000,
            1_000_000,
            10_000_000,
            100_000_000,
            1_000_000_000,
        ]
        #         yval = [0.003, 0.005, 0.02, 0.06, 1.2,
        #                 40, 210]
        yval = [0.0009, 0.0011, 0.0022, 0.005, 0.02, 0.2, 5.6]
        plots.append(plt.loglog(xval, yval, linewidth=5))
        legends.append("PyTables Std")
    show_plot(plots, yaxis, legends, gtitle)