File: gap-plot.py

package info (click to toggle)
vcfanno 0.3.7%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 14,112 kB
  • sloc: python: 335; sh: 259; makefile: 41
file content (61 lines) | stat: -rw-r--r-- 1,509 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import sys
import re
import numpy as np
from collections import defaultdict

groups = defaultdict(list)
for line in open(sys.argv[1]):
    gap, chunk, procs, info = re.split("\s+", line, 3)

    seconds = re.search("in (.+) seconds", info).groups(0)[0]
    if gap == '100' or chunk == '100': continue

    groups[(int(gap), int(chunk), int(procs))].append(float(seconds))

bychunk = defaultdict(list)
bygap = defaultdict(list)
bycpu = defaultdict(list)
for gap, chunk, cpu in groups:
    if cpu != 4: continue
    #if chunk != 5000: continue
    m = np.mean(groups[(gap, chunk, cpu)])
    groups[(gap, chunk, cpu)] = m
    bychunk[chunk].append((gap, m))
    bygap[gap].append((chunk, m))
    bycpu[cpu].append((gap, m))

from matplotlib import pyplot as plt
import seaborn as sns
sns.set_palette('Set1', len(groups))

for chunk, vals in sorted(bychunk.items()):
    vals.sort()
    xs, ys = zip(*vals)
    plt.plot(xs, ys, label="chunk-size: %d" % chunk)
    print chunk, vals

"""
for gap, vals in sorted(bygap.items()):
    vals.sort()
    xs, ys = zip(*vals)
    plt.plot(xs, ys, label="gap-size: %d" % gap)

for cpu, vals in sorted(bycpu.items()):
    vals.sort()
    xs, ys = zip(*vals)
    plt.plot(xs, ys, label="cpus: %d" % cpu)

"""

plt.xlabel("gap size")
#plt.xlabel("chunk size")
plt.ylabel("time (seconds)")
#plt.yscale('log', basey=2)
#plt.xscale('log', basex=10)
plt.legend()
plt.show()
1/0
for g, c in sorted(groups):
    if g == '100' or c == '100': continue
    print g, c, groups[(g, c)]