File: strsvsample05.cpp

package info (click to toggle)
bmagic 6.3.0-1
  • links: PTS
  • area: main
  • in suites: bookworm, bullseye, sid, trixie
  • size: 49,956 kB
  • sloc: cpp: 84,298; ansic: 9,703; sh: 1,664; makefile: 742
file content (175 lines) | stat: -rw-r--r-- 5,383 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
/*
Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

For more information please visit:  http://bitmagic.io
*/

/** \example strsvsample05.cpp

  Example of how to use bm::str_sparse_vector<> - succinct container for
  bit-transposed string collections for deserialization of only select elements
  from the serialized BLOB
 
  \sa bm::str_sparse_vector
  \sa bm::sparse_vector_deserializer
  \sa bm::sparse_vector_serializer

*/

/*! \file strsvsample05.cpp
    \brief Example: str_sparse_vector<> gather deserialization example
 
    This example loads a range of a sparse vector from an STL container to save
    memory and improve deserialization performance
*/

#include <iostream>
#include <string>
#include <vector>
#include <assert.h>

#include "bm.h"
#include "bmstrsparsevec.h"
#include "bmsparsevec_serial.h"


using namespace std;

typedef bm::bvector<> bvector_type;
typedef bm::str_sparse_vector<char, bvector_type, 5> str_sv_type;




int main(void)
{
    try
    {
       str_sv_type str_sv1;
       str_sv_type str_sv2;
       str_sv_type str_sv3;

       {
           str_sv_type str_sv0;
           // here we generate collection of k-mer (4-mer) strings
           // imitating a DNA sequence
           {
               auto bi = str_sv0.get_back_inserter();
               for (unsigned i = 0; i < 100000; ++i)
               {
                    bi = "ATGC";
                    bi = "GCTA";
                    bi = "GCAA";
                    bi = "TATA";
               } // for
           }
           str_sv1.remap_from(str_sv0); // SV1 now contains a remapped(smaller) copy of SV0
       }
       BM_DECLARE_TEMP_BLOCK(tb)
       str_sv1.optimize(tb);


        // calculate memory footprint
        //
        str_sv_type::statistics st;
        str_sv1.calc_stat(&st);

        cout << "Used memory: " << st.memory_used << std::endl;

        bm::sparse_vector_serial_layout<str_sv_type> sv_lay;

        // construct a serializer utility class, setup serialization parameters
        //
        // please note, use of "set_bookmarks()" to enable fast range
        // deserialization. Bookmarks somewhat increase the BLOB size but allow
        // more effeiciently skip parts which we would not need (paging) and
        // avoid decompression of blocks we would never need
        //
        // This example sets "128" as a bookmarks parameter, but you have to
        // experiment with what works for you, between 4 and 512
        //
        // Each block corresponds to 64K vector element
        // making bookmarks after each block does not make much sense
        // because decode is reasonably fast and some residual throw away
        // is usually ok.
        //
        bm::sparse_vector_serializer<str_sv_type> sv_serializer;
        sv_serializer.set_bookmarks(true, 128);


        // run str-vector serialization with compression
        //
        sv_serializer.serialize(str_sv1, sv_lay);

        const unsigned char* buf = sv_lay.buf();
        cout << "Serialized size = " << sv_lay.size() << endl;

        // instantiate deserializer utility class
        //
        bm::sparse_vector_deserializer<str_sv_type> sv_deserial;


        bvector_type::size_type from = 100000;
        bvector_type::size_type to = from + 65536;
        {
            // 1.
            // one way to deserialize is to provide a mask vector
            // specifying which sparse vector elements needs to be
            // decompressed from the BLOB
            // mask vector does not necessarily has to be just one range
            //
            bvector_type bv_mask;
            bv_mask.set_range(from, to);
            sv_deserial.deserialize(str_sv2, buf, bv_mask);


            // 2.
            // If it is just one range (common use case for paging)
            // it is faster and cleaner to use deserialize_range().
            // It will produce the same result as with (1) just faster.
            //
            sv_deserial.deserialize_range(str_sv3, buf, from, to);

            // run a quick comparison, that selected range matches values in
            // the container str_sv2, str_sv3
            //
            char s1[16]; char s2[16]; char s3[16];
            for (bvector_type::size_type j = from; j < to; ++j)
            {
                str_sv1.get(j, s1, sizeof(s1));
                str_sv2.get(j, s2, sizeof(s2));
                str_sv3.get(j, s3, sizeof(s3));

                int cmp;
                cmp = ::strcmp(s1, s2);
                assert(cmp==0);
                cmp = ::strcmp(s1, s3);
                assert(cmp==0);

            } // for j
            cout << "Gather deserialization check OK" << endl;
        }

    }
    catch(std::exception& ex)
    {
        std::cerr << ex.what() << std::endl;
        return 1;
    }
    

    return 0;
}