File: chunk-gap-plot.py

package info (click to toggle)
vcfanno 0.3.7%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 14,112 kB
  • sloc: python: 335; sh: 259; makefile: 41
file content (70 lines) | stat: -rw-r--r-- 2,020 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import sys
import re
import numpy as np
from collections import defaultdict

from matplotlib import pyplot as plt
import seaborn as sns
sns.set_style("white")
colors = sns.set_palette('Set1', 8)
colors = sns.color_palette('Set1', 3)

f, axes = plt.subplots(1, figsize=(4, 2))
axes = (axes,)

# run as  python chunk-gap-plot.py  1kg.times-tails.fmt.txt exac.times-tails.txt

for i, f in enumerate(sys.argv[1:3]):
    if i == 0:
        assert "1kg" in f.lower()
    else:
        assert "exac" in f.lower()

    groups = defaultdict(list)
    for line in open(f):
        gap, chunk, procs, info = re.split("\s+", line, 3)

        if not int(chunk) in (1000, 10000, 100000): continue

        seconds = re.search("in (.+) seconds", info).groups(0)[0]
        if gap == '100' or chunk == '100': continue
        if int(procs) != 4: continue

        groups[(int(gap), int(chunk))].append(float(seconds))

    bychunk = defaultdict(list)
    for gap, chunk in groups:
        #if chunk != 5000: continue
        m = np.mean(groups[(gap, chunk)])
        bychunk[chunk].append((gap, m))

    label = "ExAC" if i == 1 else "1KG"
    marker = "o" if label == "ExAC" else "s"

    for j, (chunk, vals) in enumerate(sorted(bychunk.items())):
        vals.sort()
        xs, ys = zip(*vals)
        plabel = "%d : %s" % (chunk, label)
        if i == 1:
            plabel = label
        axes[0].plot(xs, ys, color=colors[j], ls="--" if label == "ExAC" else
                "-", label=plabel) #, marker=marker)

    if i == 0:
        axes[0].set_xlabel("Gap size")
    axes[0].set_ylabel("Time (seconds)")

sns.despine()
plt.legend(ncol=2, markerfirst=False, title="Chunk size",
        loc=(axes[0].get_position().x1-0.45, axes[0].get_position().y1 - 0.085))

ax = plt.gca()
for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] +
              ax.get_xticklabels() + ax.get_yticklabels()):
    item.set_fontsize(7)
for item in ax.get_legend().get_texts():
    item.set_fontsize(5)


plt.savefig('figure-5.pdf')
plt.show()