File: doc_list_index.cpp

package info (click to toggle)
libsdsl 2.1.1%2Bdfsg-5
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 3,992 kB
  • sloc: cpp: 42,286; makefile: 1,171; ansic: 318; sh: 201; python: 27
file content (84 lines) | stat: -rw-r--r-- 2,629 bytes parent folder | download | duplicates (18)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#include "doc_list_index.hpp"
#include <iostream>
#include <chrono>
#include <algorithm>

using namespace std;
using namespace sdsl;

using idx_type = IDX_TYPE;

int main(int argc, char* argv[])
{
    if (argc < 2) {
        cout << "Usage: " << argv[0] << " collection_file" << endl;
        return 1;
    }
    string collection_file = string(argv[1]);
    idx_type idx;
    string idx_file = collection_file + SDSL_XSTR(IDX_SUF);
    cout<<"idx_file="<<idx_file<<endl;


    using timer = std::chrono::high_resolution_clock;

    if (!load_from_file(idx, idx_file)) {
        cout << "Generate index for " << collection_file << endl;
        {
            auto start = timer::now();
            idx_type temp_idx(collection_file, 1);
            auto stop = timer::now();
            auto elapsed = stop-start;
            std::cout << "construction time = " << std::chrono::duration_cast<std::chrono::milliseconds>(elapsed).count() << std::endl;
            store_to_file(temp_idx, idx_file);
            ofstream out(idx_file+".html");
            write_structure<HTML_FORMAT>(temp_idx, out);
        }
        load_from_file(idx, idx_file);
    } else {
        cout << "Loaded index from " << collection_file << endl;
    }

    using timer = std::chrono::high_resolution_clock;
    char   buffer[64];
    size_t q_len = 0;
    size_t q_cnt = 0;
    size_t sum = 0;
    size_t sum_fdt = 0;
    auto start = timer::now();
    while (cin.getline(buffer, 64)) {
        typename idx_type::result res;
        string query(buffer);
        if (q_len !=  query.size()) {
            if (q_len == 0) {
                start = timer::now();
            } else {
                auto stop = timer::now();
                auto elapsed = stop-start;
                cout<<q_len<<" "
                    <<std::chrono::duration_cast<std::chrono::microseconds>(elapsed).count()/q_cnt << endl;


                start = timer::now();
                q_cnt = 0;
            }
            q_len = query.size();
        }
        ++q_cnt;
        size_t x = idx.search(query.begin(), query.end(), res, 10);
        sum += x;
        for (auto& r : res) {
            sum_fdt += r.second;
//            cout << " " << r.first << " " << r.second << endl;
        }
//        if (res.size()==0){
//            cout<<" empty"<<endl;
//        }
//        cout << " (" << x << ")"<< endl;
    }
    auto stop = timer::now();
    auto elapsed = stop-start;
    std::cout<<q_len<<" "<<std::chrono::duration_cast<std::chrono::microseconds>(elapsed).count()/q_cnt << std::endl;

    cerr << "sum = " << sum << " sum f_dt = " << sum_fdt << endl;
}