1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482
|
#!/usr/bin/python
# This is a re-writing of post_proc.sh into Python. Feel free to
# provide feedback on how to make it better - either better
# post-processing or better Python. Keep in mind it is only the
# second Python script I have ever written :) raj 2012-06-25
# This script will run much faster than post-proc.sh does. Some of
# that may be Python versus bash+awk+grep+rrdtool. Much of that is
# undoubtedly from better alogorithms - not going through the data
# umpteen times. For example, for a test which had up to 8 netperfs
# running and so 8 files to be post-processed:
#
#raj@tardy:~/netperf2_trunk/doc/examples$ time ./post_proc.py netperf_tps.log
#Prefix is netperf_tps
#Average of peak interval is 581554.430 Trans/s from 1340326282 to 1340326404
#Minimum of peak interval is 464912.610 Trans/s from 1340326282 to 1340326404
#Maximum of peak interval is 594025.670 Trans/s from 1340326282 to 1340326404
#
#real 0m0.450s
#user 0m0.430s
#sys 0m0.010s
#raj@tardy:~/netperf2_trunk/doc/examples$ time ./post_proc.sh netperf_tps.log
#Prefix is netperf_tps
#Performing overall summary computations
#Average of peak interval is 581460 Trans/s from 1340326283 to 1340326404
#Minimum of peak interval is 464913 Trans/s from 1340326283 to 1340326404
#Maximum of peak interval is 594026 Trans/s from 1340326283 to 1340326404
#
#real 0m16.873s
#user 0m0.500s
#sys 0m0.690s
import os
import sys
import glob
import math
import rrdtool
import argparse
def find_vrules(source):
vrules = []
interval_times = []
interval_start = 0
interval_end = 0
netperf_count=0
start_time=0.0
end_time=0.0
RED="FF0000"
BLACK="000000"
resumes=False
for line in source:
if "Starting netperfs on" in line:
netperf_count += 1
elif "Pausing" in line:
fields = line.split()
plural=''
if netperf_count > 1:
plural = 's'
vrule = 'VRULE:%d#%s:%d netperf%s running' % (int(float(fields[5])),RED,int(fields[7]),plural)
vrules.append(vrule)
interval_start = int(float(fields[5]))
elif "Resuming" in line:
fields = line.split()
if resumes:
resume_text=''
else:
resume_text='Resuming ramp'
vrule = "VRULE:%d#%s:%s" % (int(float(fields[2])),BLACK,resume_text)
vrules.append(vrule)
interval_end = int(float(fields[2]))
interval_times.append((interval_start,interval_end))
resumes=True
elif "Starting netperfs at" in line:
start_time = line.split()[3]
elif "Netperfs started" in line:
fields = line.split()
vrule = 'VRULE:%d#%s:All %d netperfs running' % (int(float(fields[3])),RED,netperf_count)
vrules.append(vrule)
interval_start = int(float(fields[3]))
elif "Netperfs stopping" in line:
fields = line.split()
vrule = 'VRULE:%d#%s:Rampdown started' % (int(float(fields[2]))-1,BLACK)
vrules.append(vrule)
interval_end = int(float(fields[2]))-1
interval_times.append((interval_start,interval_end))
elif "Netperfs stopped" in line:
end_time = line.split()[2]
return vrules,float(start_time),float(end_time),interval_times
def open_rrd(basename,start_time,end_time,max_interval):
# print "Opening %s.rrd with start time %d and end time %d" % (basename,int(start_time),int(end_time))
data_sources = [ 'DS:mbps:GAUGE:%d:U:U' % max_interval ]
rra = [ 'RRA:AVERAGE:0.5:1:%d' % ((int(end_time) - int(start_time)) + 1) ]
rrdtool.create(basename + ".rrd",
'--step', '1',
'--start', str(int(start_time)),
data_sources,
rra )
return basename + ".rrd"
def update_heartbeat(basename,heartbeat):
# print "Updating heartbeat with %d" % heartbeat
rrdtool.tune(basename + ".rrd",
'--heartbeat', 'mbps:%d' % heartbeat)
def update_rrd(basename,value,timestamp):
rrdtool.update(basename + '.rrd',
'%.3f:%f' % (timestamp, value))
def add_to_ksink(basename,start_time,end_time,ksink):
((first, last, step),name,results) = rrdtool.fetch(basename + ".rrd",
'AVERAGE',
'--start', str(int(start_time)),
'--end', str(int(end_time)))
# print "First %d last %d step %d results %d" % (first, last, step, len(results))
for key,result in enumerate(results,first):
if result[0] and key in ksink:
ksink[key] += float(result[0])
else:
if result[0]:
print "Key %d not in ksink" % key
def process_result(basename, raw_results, end_time, ksink):
first_result = True
have_result = False
had_results = False
interim_result=0.0
interim_units="Trans/s"
interim_interval=1.0
interim_end=0.0
max_interval=1
for raw_result in raw_results:
# print "Checking result %s" % raw_result
if "Interim result:" in raw_result:
# human format
fields = raw_result.split()
interim_result=float(fields[2])
interim_units=fields[3]
interim_interval=float(fields[5])
interim_end=float(fields[9])
have_result=True
elif "NETPERF_INTERIM_RESULT" in raw_result:
# keyval first line
interim_result=float(raw_result.split('=')[1])
have_result=False
elif "NETPERF_UNITS" in raw_result:
# keyval second line
interim_units=raw_result.split('=')[1]
have_result=False
elif "NETPERF_INTERVAL" in raw_result:
# keyval keyval third line
interim_interval=float(raw_result.split('=')[1])
have_result=False
elif "NETPERF_ENDING" in raw_result:
# keyval keyval fourth line
interim_end=float(raw_result.split('=')[1])
have_result=True
else:
# csv, but we are interested only in those lines with four
# fields, three commas. if someone happens to ask for
# four values from the omni-output selector this may not
# work so well but we can deal with that when it is known
# to be a problem.
fields = raw_result.split(',')
if len(fields) == 4:
interim_result = float(fields[0])
interim_units = fields[1]
interim_interval = float(fields[2])
interim_end = float(fields[3])
have_result = True
else:
have_result = False
if first_result and have_result:
# we could use the overal start time, but using the first
# timestamp for this instance may save us some space in
# the rrdfile. we do though want to subtract the
# interim_interval from that timestamp to give us some
# wriggle-room - particularly if the interval happens to
# end precisely on a step boundary...
# print "First entry for %s is %f at time %f" % (basename, interim_result,interim_end)
open_rrd(basename,
interim_end-interim_interval,
end_time,
max_interval)
first_timestamp = interim_end
first_result = False
if int(math.ceil(interim_interval)) > max_interval:
max_interval = int(math.ceil(interim_interval))
update_heartbeat(basename,max_interval)
# perhaps one of these days, once we know that the rrdtool
# bits can handle it, we will build a big list of results and
# feed them en mass. until then we will dribble them one at a
# time
if have_result:
#print "updating rrd with %s at %s" % (interim_result, interim_end)
try:
update_rrd(basename,interim_result,interim_end)
except Exception as e:
print "Update to %s with %s at %s failed with %s" % (basename,interim_result,interim_end,e)
have_result = False
had_results = True
if had_results:
last_timestamp = interim_end
# print "First timestamp for this instance %f last %f" % (first_timestamp,last_timestamp)
return first_timestamp, last_timestamp
else:
return 0, 0
def process_result_files(prefix,start_time,end_time,ksink):
print "Prefix is %s" % prefix
min_timestamp = 9999999999.9
results_list = glob.glob(prefix+"*.out")
for result_file in results_list:
basename = result_file.replace(".out","")
raw_results = open(result_file,"r")
# print "Processing file %s" % basename
first_timestamp, last_timestamp = process_result(basename,
raw_results,
end_time,
ksink)
if (first_timestamp != 0) and (last_timestamp != 0):
# we have to check each time because we may not be processing
# the individual results files in order
min_timestamp = min(min_timestamp,first_timestamp)
# OK, now we get the massaged results
add_to_ksink(basename,first_timestamp,last_timestamp,ksink)
# print "For %s min_timestamp is %s" % (prefix, min_timestamp)
return min_timestamp
def generate_overall(prefix,start_time,end_time,ksink):
overall = prefix + "_overall"
open_rrd(overall,start_time-1,end_time,1)
# print "Starting time %s ending time %s" % (start_time,end_time)
# one cannot rely on the enumeration of a dictionary being in key
# order and I do not know how to sort one, so we will simply walk
# the possible keys based on the start_time and end_time and if we
# find that key in the kitchen sink, we will use the value to
# update the overall rrd.
prevkey = -1
for key in xrange(int(start_time),int(end_time)+1):
if key in ksink:
try:
update_rrd(overall,ksink[key],key)
prevkey = key
except Exception as e:
print "Update to %s failed for %d, previous %d %s" % (overall, key, prevkey, e)
def overall_min_max_avg(prefix,start_time,end_time,intervals):
max_average = 0.0
min_graph_interval = 60
length = int(end_time) - int(start_time)
# (iavg, imin, imax, istart, iend)
# first will be overwritten with peak when known
results_list=[(0.0, 0.0, 0.0, 0, 0)]
rrdtool.create(prefix + "_intervals.rrd",
'--step', '1',
'--start', str(int(start_time) - 1),
'DS:avg:GAUGE:1:U:U', 'RRA:AVERAGE:0.5:1:%d' % int(length),
'DS:min:GAUGE:1:U:U', 'RRA:AVERAGE:0.5:1:%d' % int(length),
'DS:max:GAUGE:1:U:U', 'RRA:AVERAGE:0.5:1:%d' % int(length))
for id, interval in enumerate(intervals,start=1):
# something to customize the x-axis labling
graph_interval = interval[1] - interval[0]
if (graph_interval > 0 and graph_interval < min_graph_interval):
min_graph_interval = graph_interval
start = interval[0] + 1
# take care if there was a long delay between when we started
# netperf and when we started getting results out of it.
if (start < start_time):
start = int(start_time + 1)
end = interval[1] - 1
# if we have a bogus interval, skip it
if (start >= end):
continue
# we have no interest in the size of the graph (the first two
# items in the list) so slice just the part of interest
result = rrdtool.graph('/dev/null',
'--start', str(start),
'--end', str(end),
'DEF:foo=%s_overall.rrd:mbps:AVERAGE' % prefix,
'VDEF:avg=foo,AVERAGE',
'VDEF:min=foo,MINIMUM',
'VDEF:max=foo,MAXIMUM',
'PRINT:avg:"%6.2lf"',
'PRINT:min:"%6.2lf"',
'PRINT:max:"%6.2lf"')[2]
# print "from %d to %d iavg, imin, imax are %s" % (start,end,result)
iavg = float(result[0].strip('"'))
imin = float(result[1].strip('"'))
imax = float(result[2].strip('"'))
results_list.append((iavg, imin, imax, start, end))
for time in xrange(start,end+1):
rrdtool.update(prefix + "_intervals.rrd",
'%d:%f:%f:%f' % (time, iavg, imin, imax))
if iavg > max_average:
peak_interval_id = id;
peak_interval_start = start
peak_interval_end = end
max_average = iavg
max_minimum = imin
max_maximum = imax
results_list[0]= (max_average, max_minimum, max_maximum, peak_interval_start, peak_interval_end)
return peak_interval_id, min_graph_interval, results_list
def units_et_al_by_prefix(prefix):
units = "bits/s"
multiplier = "1000000"
direction = "Bidirectional"
if ("pps" in prefix) or ("tps" in prefix):
units = "Trans/s"
multiplier = "1"
elif "inbound" in prefix:
direction = "Inbound"
elif "outbound" in prefix:
direction = "Outbound"
return units, multiplier, direction
def graph_overall(prefix,start_time,end_time,vrules,peak_interval_id=None,peak_average=0.0,major_interval=60,annotation=None,override=None):
length = int(end_time) - int(start_time)
xgrid_setting = 'SECOND:%d:SECOND:%d:SECOND:%d:0:%%X' % (major_interval/2, major_interval, major_interval)
units, multiplier, direction = units_et_al_by_prefix(prefix)
# print units,multiplier,direction
# print "Vrules",vrules
interval_specs = []
if peak_interval_id:
interval_specs = [ 'DEF:bar=%s_intervals.rrd:avg:AVERAGE' % prefix,
'CDEF:intvl=bar,%s,*' % multiplier,
'LINE2:intvl#0F0F0F40:Interval average. Peak of %.3f during interval %d' % (peak_average, peak_interval_id) ]
title = "Overall %s" % (override if override else prefix)
if annotation:
title = "Overall %s %s" % ((override if override else prefix), annotation)
rrdtool.graph(prefix + "_overall.svg", '--imgformat', 'SVG',
'--start', str(int(start_time)),
'--end', str(int(end_time)),
'-w','%d' % max(800,length),'-h','400',
'--right-axis', '1:0',
'--x-grid', xgrid_setting,
vrules,
'--font', 'DEFAULT:0:Helvetica',
'-t', title,
'-v', '%s %s' % (direction,units),
'DEF:foo=%s_overall.rrd:mbps:AVERAGE' % prefix,
'CDEF:bits=foo,%s,*' % multiplier,
'LINE2:bits#00FF0080:%s' % units,
interval_specs)
def graph_individual(prefix,start_time,end_time,vrules,major_interval=60,annotation=None,override=None):
units, multiplier, direction = units_et_al_by_prefix(prefix)
length = int(end_time) - int(start_time)
for individual in glob.glob(prefix+"*.rrd"):
basename = individual.strip(".rrd")
title = "%s %s" % (basename,(override if override else prefix))
if annotation:
title = "%s %s %s" % (basename, (override if override else prefix), annotation)
try:
rrdtool.graph(basename + ".svg",
'--imgformat','SVG',
'--start', str(int(start_time)),
'--end', str(int(end_time)),
'--font', 'DEFAULT:0:Helvetica',
'-w', '%d' % max(800,length), '-h', '400',
'--right-axis', '1:0',
vrules,
'-t', title,
'-v', '%s %s' % (direction, units),
'DEF:foo=%s.rrd:mbps:AVERAGE' % basename,
'CDEF:bits=foo,%s,*' % multiplier,
'LINE2:bits#00FF0080:%s' % units)
except:
# at some point we should make certain this was for the
# "intervals" rrd file but until then just pass
pass
def setup_parser() :
parser = argparse.ArgumentParser()
parser.add_argument("-i", "--individual", action='store_true',
default=False,
help="Generate graphs of individual tests")
parser.add_argument("-I", "--intervals", action='store_true',
default=False,
help="Emit the results for all intervals, not just peak")
parser.add_argument("-a", "--annotation",default=None,
help="Annotation to add to chart titles")
parser.add_argument("-t", "--title", default=None,
help="String to use for chart title. Default based on test")
parser.add_argument('filename')
return parser
if __name__ == '__main__':
parser = setup_parser()
args = parser.parse_args()
filename = args.filename
prefix = filename.replace(".log","")
source = open(filename,"r")
vrules,start_time,end_time,intervals = find_vrules(source)
#print vrules
# at one point for some reason I thought one could not add to a
# dict on the fly, which of course I now know is silly, but for
# the time being I will preallocate the entire dict in one fell
# swoop until I can modify add_to_ksink() accordingly
length = int(end_time + 1) - int(start_time)
ksink=dict(zip(xrange(int(start_time),
int(end_time)+1),
[0.0] * length))
min_timestamp = process_result_files(prefix,start_time,end_time,ksink)
if min_timestamp == 9999999999.9:
print "There were no valid results for this prefix!"
exit()
# print "Min timestamp for %s is %s start time is %s end_time is %s" % (prefix,min_timestamp,start_time,end_time)
generate_overall(prefix,min_timestamp-2,end_time,ksink)
peak_interval_id, min_graph_interval, results_list = overall_min_max_avg(prefix,min_timestamp,end_time,intervals)
peak_average = results_list[0][0]
peak_minimum = results_list[0][1]
peak_maximum = results_list[0][2]
peak_start = results_list[0][3]
peak_end = results_list[0][4]
graph_overall(prefix, min_timestamp, end_time, vrules, peak_interval_id,
peak_average, major_interval=min_graph_interval,
annotation=args.annotation, override=args.title)
if args.individual:
graph_individual(prefix, min_timestamp, end_time, vrules,
major_interval=min_graph_interval,
annotation=args.annotation,override=args.title)
units, multiplier, direction = units_et_al_by_prefix(prefix)
print "Average of peak interval is %.3f %s from %d to %d" % (results_list[0][0] * float(multiplier), units, peak_start, peak_end)
print "Minimum of peak interval is %.3f %s from %d to %d" % (peak_minimum * float(multiplier), units, peak_start, peak_end)
print "Maximum of peak interval is %.3f %s from %d to %d" % (peak_maximum * float(multiplier), units, peak_start, peak_end)
if args.intervals:
for id, interval in enumerate(results_list[1:]):
print "Average of interval %d is %.3f %s from %d to %d" % (id, interval[0] * float(multiplier), units, interval[3], interval[4])
print "Minimum of interval %d is %.3f %s from %d to %d" % (id, interval[1] * float(multiplier), units, interval[3], interval[4])
print "Maximum of interval %d is %.3f %s from %d to %d" % (id, interval[2] * float(multiplier), units, interval[3], interval[4])
|