1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
|
import matplotlib.pyplot as plt
import xml.etree.ElementTree as ET
from collections import defaultdict
from glob import glob
from numpy import array
def plot_y_per_x_per_plot(data, xlabel, ylabel, file, log=False):
plt.figure(file)
bar_width = 1 / (len(data) + 1)
for i, (label, xys) in enumerate(sorted(data.items())):
plt.bar(array(list(xys.keys())) + bar_width * (i + 1 - len(data) / 2), array(list(xys.values())), bar_width,
label=label)
plt.legend()
plt.xlabel(xlabel)
plt.ylabel(ylabel)
plt.yscale('log')
if log:
plt.yscale('log')
plt.savefig(file)
def plot_usecase(name: str):
aqet = defaultdict(dict)
avgresults_by_query = defaultdict(lambda: defaultdict(dict))
for file in glob('bsbm.{}.*.xml'.format(name)):
parts = file.split('.')
run = '.'.join(parts[2:-1])
for query in ET.parse(file).getroot().find('queries').findall('query'):
query_id = int(query.attrib['nr'])
for child in query.iter():
if child.tag == "aqet":
val = float(query.find('aqet').text)
if val > 0:
aqet[run][query_id] = val
elif child.tag == "avgresults":
avgresults_by_query[query_id][int(parts[-3])][run] = float(query.find('avgresults').text)
plot_y_per_x_per_plot(aqet, 'query id', 'execution time (s)', 'bsbm.{}.svg'.format(name))
# we check if avgresults seems consistent
for query, t in avgresults_by_query.items():
for size, value_by_run in t.items():
avg = sum(value_by_run.values()) / len(value_by_run)
if not all(abs(v - avg) < 1 for v in value_by_run.values()):
print(
f'Strange value for average results for usecase {name} of size {size} and query {query}: {value_by_run}')
plot_usecase('explore')
plot_usecase('exploreAndUpdate')
plot_usecase('businessIntelligence')
plt.show()
|