1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
|
#include "doc_list_index.hpp"
#include <iostream>
#include <chrono>
#include <algorithm>
using namespace std;
using namespace sdsl;
using idx_type = IDX_TYPE;
int main(int argc, char* argv[])
{
if (argc < 2) {
cout << "Usage: " << argv[0] << " collection_file" << endl;
return 1;
}
string collection_file = string(argv[1]);
idx_type idx;
string idx_file = collection_file + SDSL_XSTR(IDX_SUF);
cout<<"idx_file="<<idx_file<<endl;
using timer = std::chrono::high_resolution_clock;
if (!load_from_file(idx, idx_file)) {
cout << "Generate index for " << collection_file << endl;
{
auto start = timer::now();
idx_type temp_idx(collection_file, 1);
auto stop = timer::now();
auto elapsed = stop-start;
std::cout << "construction time = " << std::chrono::duration_cast<std::chrono::milliseconds>(elapsed).count() << std::endl;
store_to_file(temp_idx, idx_file);
ofstream out(idx_file+".html");
write_structure<HTML_FORMAT>(temp_idx, out);
}
load_from_file(idx, idx_file);
} else {
cout << "Loaded index from " << collection_file << endl;
}
using timer = std::chrono::high_resolution_clock;
char buffer[64];
size_t q_len = 0;
size_t q_cnt = 0;
size_t sum = 0;
size_t sum_fdt = 0;
auto start = timer::now();
while (cin.getline(buffer, 64)) {
typename idx_type::result res;
string query(buffer);
if (q_len != query.size()) {
if (q_len == 0) {
start = timer::now();
} else {
auto stop = timer::now();
auto elapsed = stop-start;
cout<<q_len<<" "
<<std::chrono::duration_cast<std::chrono::microseconds>(elapsed).count()/q_cnt << endl;
start = timer::now();
q_cnt = 0;
}
q_len = query.size();
}
++q_cnt;
size_t x = idx.search(query.begin(), query.end(), res, 10);
sum += x;
for (auto& r : res) {
sum_fdt += r.second;
// cout << " " << r.first << " " << r.second << endl;
}
// if (res.size()==0){
// cout<<" empty"<<endl;
// }
// cout << " (" << x << ")"<< endl;
}
auto stop = timer::now();
auto elapsed = stop-start;
std::cout<<q_len<<" "<<std::chrono::duration_cast<std::chrono::microseconds>(elapsed).count()/q_cnt << std::endl;
cerr << "sum = " << sum << " sum f_dt = " << sum_fdt << endl;
}
|