1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
|
// See the file "COPYING" in the main distribution directory for copyright.
#include "benchmark.h"
#include <random>
#include <memory>
std::random_device dev;
std::mt19937 rng(dev());
std::uniform_int_distribution<std::mt19937::result_type> dist(0,RAND_MAX);
int rand_int()
{
return dist(rng);
}
static const char* benchmark_pattern_words[] = {
"aaaaaa", "bb", "cccccccccccccccc", "ddddd", "eeeeeeeee", "fffffffffffff", "gggg"
};
const char* random_pattern_word(){
int idx = rand_int() % (sizeof(benchmark_pattern_words) / sizeof(const char*));
return benchmark_pattern_words[idx];
}
const char* random_word()
{
static char buffer[1024];
int j;
int rounds = (rand_int() % 25) + 5;
for ( j = 0; j < rounds; j++ ) {
buffer[j] = (char)((rand_int() % 26) + 'a');
}
buffer[rounds] = '\0';
return buffer;
}
double benchmark(char* a, char* b, char* c, bool silent) {
long num_patterns = atol(a);
long num_queries = atol(b);
long match_prob = atol(c);
return benchmark_n(num_patterns, num_queries, match_prob, silent);
}
double benchmark_n(long num_patterns, long num_queries, long match_prob, bool silent) {
if (!silent) {
std::cout << "creating workload:\n";
std::cout << "\t# patterns: " << num_patterns << "\n";
std::cout << "\t# queries: " << num_queries << "\n";
std::cout << "\t% matches: " << match_prob << "\n";
}
// Create the patterns.
std::unique_ptr<std::string[]> patterns(new std::string[num_patterns]);
char buffer[1024];
int i, j;
for ( i = 0; i < num_patterns; i++ ) {
buffer[0] = '\0';
int rounds = (rand_int() % 10) + 2;
for ( j = 0; j < rounds; j++ ) {
if ( j != 0 )
strcat(buffer, "*");
if ( (rand_int() % 10) == 0 ) {
strcat(buffer, random_pattern_word());
}
else {
strcat(buffer, random_word());
}
}
std::string s(buffer);
patterns[i] = s;
}
// Create the queries.
std::unique_ptr<std::string[]> queries(new std::string[num_queries]);
for ( i = 0; i < num_queries; i++ ) {
buffer[0] = '\0';
if ( (rand_int() % 100) <= match_prob ) {
// Create a likely match candidate.
int rounds = (rand_int() % 5) + 1;
for ( j = 0; j < rounds; j++ ) {
strcat(buffer, random_pattern_word());
}
}
else {
// Create a mismatch.
int rounds = (rand_int() % 50) + 5;
for ( j = 0; j < rounds; j++ ) {
buffer[j] = (char)((rand_int() % 26) + 'a');
}
buffer[rounds] = '\0';
}
queries[i] = std::string(strdup(buffer));
}
if (!silent) {
std::cout << "creating paraglob \n";
}
auto build_start = std::chrono::high_resolution_clock::now();
paraglob::Paraglob myGlob;
for ( i = 0; i < num_patterns; ++i ) {
const auto& p = patterns[i];
myGlob.add(p);
}
myGlob.compile();
auto build_finish = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> build_time = build_finish - build_start;
auto start = std::chrono::high_resolution_clock::now();
if (!silent) {
std::cout << "making queries \n";
}
for ( i = 0; i < num_queries; ++i ) {
const auto& q = queries[i];
myGlob.get(q);
}
auto finish = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> elapsed = finish - start;
if (!silent) {
std::cout << "Build time: " << build_time.count() << "s\n";
std::cout << "Search time: " << elapsed.count() << " s\n";
std::cout << "Queries/second: " << num_queries/elapsed.count() << "\n";
}
return elapsed.count() + build_time.count();
}
void makeGraphData() {
/*
prints data to the console for generation of 3d plot
of paraglob performance.
x axis is number of patterns
y axis is number of queries
z axis is the time taken to build and run the queries
*/
for(long patterns = 500; patterns <= 10000; patterns += 500) {
std::cout << "{ ";
for(long queries = 1000; queries <= 20000; queries += 1000) {
std::cout << benchmark_n(patterns, queries, 10, true);
if (queries != 20000) {
std::cout << ", ";
}
}
std::cout << "},\n";
}
}
|