File: parallel_splitting.h

package info (click to toggle)
seqan2 2.4.0%2Bdfsg-11
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 223,504 kB
  • sloc: cpp: 256,886; ansic: 91,672; python: 8,339; sh: 995; xml: 570; makefile: 251; awk: 51
file content (320 lines) | stat: -rw-r--r-- 10,926 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
// ==========================================================================
//                 SeqAn - The Library for Sequence Analysis
// ==========================================================================
// Copyright (c) 2006-2018, Knut Reinert, FU Berlin
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
//     * Redistributions of source code must retain the above copyright
//       notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above copyright
//       notice, this list of conditions and the following disclaimer in the
//       documentation and/or other materials provided with the distribution.
//     * Neither the name of Knut Reinert or the FU Berlin nor the names of
//       its contributors may be used to endorse or promote products derived
//       from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
// DAMAGE.
//
// ==========================================================================
// Author: Manuel Holtgrewe <manuel.holtgrewe@fu-berlin.de>
// Author: David Weese <david.weese@fu-berlin.de>
// ==========================================================================
// Utility macros for parallelism.
// ==========================================================================

#ifndef SEQAN_PARALLEL_PARALLEL_SPLITTING_H_
#define SEQAN_PARALLEL_PARALLEL_SPLITTING_H_

namespace seqan {

struct Equidistant_;
typedef Tag<Equidistant_> Equidistant;

/*!
 * @class Splitter
 * @headerfile <seqan/parallel.h>
 * @brief Splits an interval into subintervals.
 *
 * @signature template <typename TValue, typename TSpec>
 *            class Splitter;
 *
 * @tparam TValue Type of the interval boundaries.
 * @tparam TSpec  Tag to select the way the values are sampled.
 *
 * This class divides an interval into the disjoint union of subintervals and enumerates its boundaries.  It can be used
 * to parallelize large for-loops that iterate over a contiguous range of elements.  The interval and the number of
 * subintervals can be set in the constructor @link Splitter::Splitter @endlink.  @link Splitter#length @endlink and
 * @link Splitter#resize @endlink can be used to retrieve or change the number of subintervals later.  In contrast to
 * other containers the Splitter allows one to access one more element than its length would imply to allow to retrieve the
 * right boundary of each subinterval (see example code below).
 *
 * @section Examples
 *
 * Simple example for equidistant (default) splitting.
 *
 * @include demos/dox/parallel/splitter_example.cpp
 *
 * The output is:
 *
 * @include demos/dox/parallel/splitter_example.cpp.stdout
 */

/*!
 * @fn Splitter::Splitter
 * @brief Constructor
 *
 * @signature Splitter::Splitter(beginPos, endPos[, subintervalCount]);
 * @signature Splitter::Splitter(beginPos, endPos, parallelTag);
 *
 * @param[in] beginPos         Left interval boundary.
 * @param[in] endPos           Right interval boundary.
 * @param[in] subintervalCount Number of subintervals.  @link Splitter#length @endlink and @link Splitter#resize
 *                             @endlink can be used to retrieve or change the number of subintervals later.
 *                             Default: The minimum of interval size and the number of available threads (returned by
 *                             <tt>omp_get_max_threads()</tt>).
 * @param[in] parallelTag      Tag to generically enable/disable parallelism.  If its type is <tt>Parallel</tt>, the
 *                             default number of subintervals is used.  If it is <tt>Serial</tt>, only one subinterval
 *                             is used.  This tag should be used to write generic parallel algorithms and to switch
 *                             between parallel and serial variants.
 */

/*!
 * @class Equidistant Splitter
 * @extends Splitter
 * @headerfile <seqan/parallel.h>
 * @brief Splits an interval into equal-sized subintervals.
 *
 * @signature template <typename TValue>
 *            class Splitter<TValue, Equidistant>;
 *
 * @tparam TValue Type of the interval boundaries.
 *
 * This @link Splitter @endlink specialization divides an interval into subintervals of (almost) equal length, i.e. two
 * subintervals differ by at most 1 in size.
 *
 * @section Examples
 *
 * Simple example for equidistant splitting.
 *
 * @include demos/dox/parallel/splitter_example.cpp
 *
 * Output:
 *
 * @code
 * [10,14)
 * [14,17)
 * [17,20)
 * @endcode
 */


template <typename TValue, typename TSpec = Equidistant>
class Splitter
{
public:
    typedef typename Size<Splitter>::Type TSize;

    TValue beginPos;
    TSize subintervalCount;
    TSize blockLength;
    TSize rest;

    Splitter(TValue beginPos_, TValue endPos):
        beginPos(beginPos_)
    {
        // we choose the counts automatically and don't want to have empty jobs
        _resize(*this, endPos - beginPos, _min((TSize)(endPos - beginPos), (TSize)omp_get_max_threads()));
    }

    Splitter(TValue beginPos_, TValue endPos, Parallel):
        beginPos(beginPos_)
    {
        // we choose the counts automatically and don't want to have empty jobs
        _resize(*this, endPos - beginPos, _min((TSize)(endPos - beginPos), (TSize)omp_get_max_threads()));
    }

    Splitter(TValue beginPos_, TValue endPos, Serial):
        beginPos(beginPos_)
    {
        // we produce at most 1 job (or none if interval is empty)
        _resize(*this, endPos - beginPos, _min((TSize)(endPos - beginPos), (TSize)1));
    }

    Splitter(TValue beginPos_, TValue endPos, TSize subintervalCount):
        beginPos(beginPos_)
    {
        _resize(*this, endPos - beginPos, subintervalCount);
    }

    TValue operator[] (TSize i) const
    {
        SEQAN_ASSERT_LEQ_MSG(i, subintervalCount, "Trying to access an element behind the last one!");
        return beginPos + blockLength * i + std::min(i, rest);
    }
};

/*!
 * @mfn Splitter#Size
 * @brief Returns the size type for the Splitter.
 *
 * @signature Size<TSplitter>::Type;
 *
 * @tparam TSplitter The Splitter to query for its size type.
 */

template <typename TValue, typename TSpec>
struct Size<Splitter<TValue, TSpec> >
{
    typedef typename MakeUnsigned<typename Difference<TValue>::Type>::Type Type;
};

/*!
 * @mfn Splitter#Value
 * @brief Returns the value type for the Splitter.
 *
 * @signature Value<TSplitter>::Type;
 *
 * @tparam TSplitter The Splitter to query for its value type.
 */

template <typename TValue, typename TSpec>
struct Value<Splitter<TValue, TSpec> >
{
    typedef TValue Type;
};

/*!
 * @fn Splitter#length
 * @brief Return the number of elements in the splitter.
 *
 * @signature TSize length(splitter);
 *
 * @param[in] splitter The Splitter object to query.
 *
 * @return TSize The number of elements in the Splitter, <tt>TSize</tt> is the size type.
 */

template <typename TValue, typename TSpec>
inline typename Size<Splitter<TValue, TSpec> >::Type
length(Splitter<TValue, TSpec> const &splitter)
{
    return splitter.subintervalCount;
}

/*!
 * @fn Splitter#resize
 * @brief Change the number of elements in the splitter.
 *
 * @signature void resize(splitter, newCount);
 *
 * @param[out] splitter The Splitter object to update.
 * @param[in]  newCount The updated number of elements in the splitter.
 */

template <typename TValue, typename TSpec, typename TSize1, typename TSize2>
inline void
_resize(Splitter<TValue, TSpec> &splitter, TSize1 intervalLen, TSize2 newCount)
{
    if (newCount != 0)
    {
        splitter.blockLength = intervalLen / newCount;
        splitter.rest = intervalLen % newCount;
    }
    else
    {
        splitter.blockLength = 0;
        splitter.rest = intervalLen;
    }
    splitter.subintervalCount = newCount;
}

template <typename TValue, typename TSpec, typename TSize>
inline typename Size<Splitter<TValue, TSpec> >::Type
resize(Splitter<TValue, TSpec> &splitter, TSize newCount)
{
    _resize(splitter, splitter.blockLength * splitter.subintervalCount + splitter.rest, newCount);
    return newCount;
}

// --------------------------------------------------------------------------
// Function computeSplitters()
// --------------------------------------------------------------------------

/*!
 * @fn computeSplitters
 * @headerfile <seqan/parallel.h>
 * @brief Compute splitters for a sequence of objects.
 *
 * @signature void computeSplitters(splitters, size, count);
 *
 * @param[out] splitters Resulting splitters, will be resized to contain <tt>count + 1</tt> elements, e.g. an
 *                       @link AllocString @endlink of integers.
 * @param[in]  size     The number of objects to split.
 * @param[in]  count     The number of chunks.
 *
 * @section Remarks
 *
 * The first <tt>count - 1</tt> chunks will have the size <tt>ceil(size / count)</tt>, the last chunk will contain the
 * rest.
 *
 * @section Examples
 *
 * Most simple case for splitting.
 *
 * @code{.cpp}
 * String<unsigned> splitters;
 * computeSplitters(splitters, 10, 5);
 * // splitters == {0, 5, 10}
 * @endcode
 *
 * In this case, the last chunks will stay empty.
 *
 * @code{.cpp}
 * computeSplitters(splitters, 3, 5);
 * // splitters == {0, 1, 2, 3, 3, 3}
 * @endcode
 */

template <typename TPosString, typename TSize, typename TCount>
void computeSplitters(TPosString & splitters, TSize size, TCount count)
{
    typedef typename Value<TPosString>::Type TPos;

    SEQAN_ASSERT_GEQ(count, (TCount)0);

    resize(splitters, count + 1);

    TSize blockLength = (size / count) + 1;
    TCount rest = size % count;
    TPos pos = 0;

    TCount i = 0;

    // the first (size % count) many blocks have length (size / count) + 1
    for (; i < rest; ++i, pos += blockLength)
        splitters[i] = pos;

    // the remaining blocks have length (size / count)
    --blockLength;
    for (; i <= count; ++i, pos += blockLength)
        splitters[i] = pos;

    SEQAN_ASSERT_EQ(back(splitters), static_cast<TPos>(size));
}

}  // namespace seqan

#endif  // SEQAN_PARALLEL_PARALLEL_SPLITTING_H_