File: benchmark.cpp

package info (click to toggle)
paraglob 0.6.0-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, sid, trixie
  • size: 308 kB
  • sloc: ansic: 1,116; cpp: 566; sh: 78; makefile: 9
file content (164 lines) | stat: -rw-r--r-- 4,296 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
// See the file "COPYING" in the main distribution directory for copyright.

#include "benchmark.h"
#include <random>
#include <memory>

std::random_device dev;
std::mt19937 rng(dev());
std::uniform_int_distribution<std::mt19937::result_type> dist(0,RAND_MAX);

int rand_int()
  {
  return dist(rng);
  }

static const char* benchmark_pattern_words[] = {
    "aaaaaa", "bb", "cccccccccccccccc", "ddddd", "eeeeeeeee", "fffffffffffff", "gggg"
};

const char* random_pattern_word(){
    int idx = rand_int() % (sizeof(benchmark_pattern_words) / sizeof(const char*));
    return benchmark_pattern_words[idx];
}

const char* random_word()
{
    static char buffer[1024];

    int j;
    int rounds = (rand_int() % 25) + 5;
    for ( j = 0; j < rounds; j++ ) {
        buffer[j] = (char)((rand_int() % 26) + 'a');
    }

    buffer[rounds] = '\0';

    return buffer;
}

double benchmark(char* a, char* b, char* c, bool silent) {
  long num_patterns = atol(a);
  long num_queries = atol(b);
  long match_prob = atol(c);
  return benchmark_n(num_patterns, num_queries, match_prob, silent);
}

double benchmark_n(long num_patterns, long num_queries, long match_prob, bool silent) {
  if (!silent) {
    std::cout << "creating workload:\n";
    std::cout << "\t# patterns: " << num_patterns << "\n";
    std::cout << "\t# queries: " << num_queries << "\n";
    std::cout << "\t% matches: " << match_prob << "\n";
  }

  // Create the patterns.
  std::unique_ptr<std::string[]> patterns(new std::string[num_patterns]);
  char buffer[1024];
  int i, j;

  for ( i = 0; i < num_patterns; i++ ) {

      buffer[0] = '\0';

      int rounds = (rand_int() % 10) + 2;
      for ( j = 0; j < rounds; j++ ) {

          if ( j != 0 )
              strcat(buffer, "*");

          if ( (rand_int() % 10) == 0 ) {
              strcat(buffer, random_pattern_word());
            }
          else {
              strcat(buffer, random_word());
            }
      }

      std::string s(buffer);
      patterns[i] = s;
    }

    // Create the queries.

  std::unique_ptr<std::string[]> queries(new std::string[num_queries]);

  for ( i = 0; i < num_queries; i++ ) {

      buffer[0] = '\0';

      if ( (rand_int() % 100) <= match_prob ) {
          // Create a likely match candidate.
          int rounds = (rand_int() % 5) + 1;
          for ( j = 0; j < rounds; j++ ) {
              strcat(buffer, random_pattern_word());
          }
      }

      else {
          // Create a mismatch.
          int rounds = (rand_int() % 50) + 5;
          for ( j = 0; j < rounds; j++ ) {
              buffer[j] = (char)((rand_int() % 26) + 'a');
          }

          buffer[rounds] = '\0';
      }

      queries[i] = std::string(strdup(buffer));
  }

  if (!silent) {
    std::cout << "creating paraglob \n";
  }
  auto build_start = std::chrono::high_resolution_clock::now();
  paraglob::Paraglob myGlob;
  for ( i = 0; i < num_patterns; ++i ) {
    const auto& p = patterns[i];
    myGlob.add(p);
  }
  myGlob.compile();
  auto build_finish = std::chrono::high_resolution_clock::now();
  std::chrono::duration<double> build_time = build_finish - build_start;

  auto start = std::chrono::high_resolution_clock::now();

  if (!silent) {
    std::cout << "making queries \n";
  }
  for ( i = 0; i < num_queries; ++i ) {
    const auto& q = queries[i];
    myGlob.get(q);
  }

  auto finish = std::chrono::high_resolution_clock::now();
  std::chrono::duration<double> elapsed = finish - start;

  if (!silent) {
    std::cout << "Build time: " << build_time.count() << "s\n";
    std::cout << "Search time: " << elapsed.count() << " s\n";
    std::cout << "Queries/second: " << num_queries/elapsed.count() << "\n";
  }

  return elapsed.count() + build_time.count();
}

void makeGraphData() {
  /*
  prints data to the console for generation of 3d plot
  of paraglob performance.
  x axis is number of patterns
  y axis is number of queries
  z axis is the time taken to build and run the queries
  */
  for(long patterns = 500; patterns <= 10000; patterns += 500) {
    std::cout << "{ ";
    for(long queries = 1000; queries <= 20000; queries += 1000) {
      std::cout << benchmark_n(patterns, queries, 10, true);
      if (queries != 20000) {
        std::cout << ", ";
      }
    }
    std::cout << "},\n";
  }
}