1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
|
#!/usr/bin/env python3
#
import sys
import json
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict
def semver2int(semver):
if semver == 'trunk':
semver = '0.10.0.0'
vi = 0
i = 0
for v in reversed(semver.split('.')):
vi += int(v) * (i * 10)
i += 1
return vi
def get_perf_data(perfname, stats):
""" Return [labels,x,y,errs] for perfname 'mb_per_sec' as a numpy arrays
labels: broker versions
x: list with identical value (to plot on same x point)
y: perfname counter (average)
errs: errors
"""
ver = defaultdict(list)
# Per version:
# * accumulate values
# * calculate average
# * calculate error
# Accumulate values per version
for x in stats:
v = str(x[0])
ver[v].append(x[1][perfname])
print('%s is %s' % (perfname, ver))
labels0 = sorted(ver.keys(), key=semver2int)
y0 = list()
errs0 = list()
# Maintain order by using labels0
for v in labels0:
# Calculate average
avg = sum(ver[v]) / float(len(ver[v]))
y0.append(avg)
# Calculate error
errs0.append(max(ver[v]) - avg)
labels = np.array(labels0)
y1 = np.array(y0)
x1 = np.array(range(0, len(labels)))
errs = np.array(errs0)
return [labels, x1, y1, errs]
def plot(description, name, stats, perfname, outfile=None):
labels, x, y, errs = get_perf_data(perfname, stats)
plt.title('%s: %s %s' % (description, name, perfname))
plt.xlabel('Kafka version')
plt.ylabel(perfname)
plt.errorbar(x, y, yerr=errs, alpha=0.5)
plt.xticks(x, labels, rotation='vertical')
plt.margins(0.2)
plt.subplots_adjust(bottom=0.2)
if outfile is None:
plt.show()
else:
plt.savefig(outfile, bbox_inches='tight')
return
if __name__ == '__main__':
outfile = sys.argv[1]
reports = []
for rf in sys.argv[2:]:
with open(rf) as f:
reports.append(json.load(f))
stats = defaultdict(list)
# Extract performance test data
for rep in reports:
perfs = rep.get(
'tests',
dict()).get(
'0038_performance',
list).get(
'report',
None)
if perfs is None:
continue
for perf in perfs:
for n in ['producer', 'consumer']:
o = perf.get(n, None)
if o is None:
print('no %s in %s' % (n, perf))
continue
stats[n].append((rep.get('broker_version', 'unknown'), o))
for t in ['producer', 'consumer']:
for perfname in ['mb_per_sec', 'records_per_sec']:
plot('librdkafka 0038_performance test: %s (%d samples)' %
(outfile, len(reports)),
t, stats[t], perfname, outfile='%s_%s_%s.png' % (
outfile, t, perfname))
|