File: pyarrow-read-jaggedN-parquet.py

package info (click to toggle)
python-awkward 2.6.5-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 23,088 kB
  • sloc: python: 148,689; cpp: 33,562; sh: 432; makefile: 21; javascript: 8
file content (45 lines) | stat: -rw-r--r-- 1,125 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import sys
import time
import subprocess

import awkward as ak

compress = sys.argv[1]
N = int(sys.argv[2])
is_split = sys.argv[3] == "split"

s = "-split" if is_split else ""
filename = f"/home/jpivarski/storage/data/chep-2021-jagged-jagged-jagged/{compress}{s}-jagged{N}.parquet"

subprocess.call(f"vmtouch -t {filename} > /dev/null", shell=True)
subprocess.call(f"vmtouch {filename} | fgrep Pages", shell=True)

array = ak.from_parquet(filename, lazy=True)

begintime = time.time()
for partition in array.layout.partitions:
    tmp = partition.array

endtime = time.time()

print(f"pyarrow {compress}{s}-jagged{N}", endtime - begintime, "seconds")

array = ak.from_parquet(filename, lazy=True)

begintime = time.time()
for partition in array.layout.partitions:
    tmp = partition.array

endtime = time.time()

print(f"pyarrow {compress}{s}-jagged{N}", endtime - begintime, "seconds")

array = ak.from_parquet(filename, lazy=True)

begintime = time.time()
for partition in array.layout.partitions:
    tmp = partition.array

endtime = time.time()

print(f"pyarrow {compress}{s}-jagged{N}", endtime - begintime, "seconds")