1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
|
from __future__ import print_function
import sys
import os
import timeit
import math
import argparse
import fnmatch
import json
parser = argparse.ArgumentParser(description="Python protobuf benchmark")
parser.add_argument("data_files", metavar="dataFile", nargs="+",
help="testing data files.")
parser.add_argument("--json", action="store_const", dest="json",
const="yes", default="no",
help="Whether to output json results")
parser.add_argument("--behavior_prefix", dest="behavior_prefix",
help="The output json format's behavior's name's prefix",
default="")
# BEGIN CPP GENERATED MESSAGE
parser.add_argument("--cpp_generated", action="store_const",
dest="cpp_generated", const="yes", default="no",
help="Whether to link generated code library")
# END CPP GENERATED MESSAGE
args = parser.parse_args()
# BEGIN CPP GENERATED MESSAGE
# CPP generated code must be linked before importing the generated Python code
# for the descriptor can be found in the pool
if args.cpp_generated != "no":
sys.path.append( os.path.dirname( os.path.dirname( os.path.abspath(__file__) ) ) + "/.libs" )
import libbenchmark_messages
sys.path.append( os.path.dirname( os.path.dirname( os.path.abspath(__file__) ) ) + "/tmp" )
# END CPP GENERATED MESSAGE
import datasets.google_message1.proto2.benchmark_message1_proto2_pb2 as benchmark_message1_proto2_pb2
import datasets.google_message1.proto3.benchmark_message1_proto3_pb2 as benchmark_message1_proto3_pb2
import datasets.google_message2.benchmark_message2_pb2 as benchmark_message2_pb2
import datasets.google_message3.benchmark_message3_pb2 as benchmark_message3_pb2
import datasets.google_message4.benchmark_message4_pb2 as benchmark_message4_pb2
import benchmarks_pb2 as benchmarks_pb2
def run_one_test(filename):
data = open(filename, "rb").read()
benchmark_dataset = benchmarks_pb2.BenchmarkDataset()
benchmark_dataset.ParseFromString(data)
total_bytes = 0
for payload in benchmark_dataset.payload:
total_bytes += len(payload)
benchmark_util = Benchmark(full_iteration=len(benchmark_dataset.payload),
module="py_benchmark",
setup_method="init",
total_bytes=total_bytes)
result={}
result["filename"] = filename
result["message_name"] = benchmark_dataset.message_name
result["benchmarks"] = {}
benchmark_util.set_test_method("parse_from_benchmark")
result["benchmarks"][args.behavior_prefix + "_parse_from_benchmark"] = \
benchmark_util.run_benchmark(setup_method_args='"%s"' % (filename))
benchmark_util.set_test_method("serialize_to_benchmark")
result["benchmarks"][args.behavior_prefix + "_serialize_to_benchmark"] = \
benchmark_util.run_benchmark(setup_method_args='"%s"' % (filename))
return result
def init(filename):
global benchmark_dataset, message_class, message_list, counter, total_bytes
message_list=[]
counter = 0
total_bytes = 0
data = open(filename, "rb").read()
benchmark_dataset = benchmarks_pb2.BenchmarkDataset()
benchmark_dataset.ParseFromString(data)
if benchmark_dataset.message_name == "benchmarks.proto3.GoogleMessage1":
message_class = benchmark_message1_proto3_pb2.GoogleMessage1
elif benchmark_dataset.message_name == "benchmarks.proto2.GoogleMessage1":
message_class = benchmark_message1_proto2_pb2.GoogleMessage1
elif benchmark_dataset.message_name == "benchmarks.proto2.GoogleMessage2":
message_class = benchmark_message2_pb2.GoogleMessage2
elif benchmark_dataset.message_name == "benchmarks.google_message3.GoogleMessage3":
message_class = benchmark_message3_pb2.GoogleMessage3
elif benchmark_dataset.message_name == "benchmarks.google_message4.GoogleMessage4":
message_class = benchmark_message4_pb2.GoogleMessage4
else:
raise IOError("Message %s not found!" % (benchmark_dataset.message_name))
for one_payload in benchmark_dataset.payload:
temp = message_class()
temp.ParseFromString(one_payload)
message_list.append(temp)
total_bytes += len(one_payload)
def parse_from_benchmark():
global counter, message_class, benchmark_dataset
m = message_class().ParseFromString(benchmark_dataset.payload[counter % len(benchmark_dataset.payload)])
counter = counter + 1
def serialize_to_benchmark():
global counter, message_list, message_class
s = message_list[counter % len(benchmark_dataset.payload)].SerializeToString()
counter = counter + 1
class Benchmark:
def __init__(self, module=None, test_method=None,
setup_method=None, total_bytes=None, full_iteration = 1):
self.full_iteration = full_iteration
self.module = module
self.test_method = test_method
self.setup_method = setup_method
self.total_bytes = total_bytes
def set_test_method(self, test_method):
self.test_method = test_method
def full_setup_code(self, setup_method_args=''):
setup_code = ""
setup_code += "from %s import %s\n" % (self.module, self.test_method)
setup_code += "from %s import %s\n" % (self.module, self.setup_method)
setup_code += "%s(%s)\n" % (self.setup_method, setup_method_args)
return setup_code
def dry_run(self, test_method_args='', setup_method_args=''):
return timeit.timeit(stmt="%s(%s)" % (self.test_method, test_method_args),
setup=self.full_setup_code(setup_method_args),
number=self.full_iteration);
def run_benchmark(self, test_method_args='', setup_method_args=''):
reps = self.full_iteration;
t = self.dry_run(test_method_args, setup_method_args);
if t < 3 :
reps = int(math.ceil(3 / t)) * self.full_iteration
if reps != self.full_iteration:
t = timeit.timeit(stmt="%s(%s)" % (self.test_method, test_method_args),
setup=self.full_setup_code(setup_method_args),
number=reps);
return self.total_bytes * 1.0 / 2 ** 20 / (1.0 * t / reps * self.full_iteration)
if __name__ == "__main__":
results = []
for file in args.data_files:
results.append(run_one_test(file))
if args.json != "no":
print(json.dumps(results))
else:
for result in results:
print("Message %s of dataset file %s" % \
(result["message_name"], result["filename"]))
print("Average throughput for parse_from_benchmark: %.2f MB/s" % \
(result["benchmarks"][ \
args.behavior_prefix + "_parse_from_benchmark"]))
print("Average throughput for serialize_to_benchmark: %.2f MB/s" % \
(result["benchmarks"][ \
args.behavior_prefix + "_serialize_to_benchmark"]))
print("")
|