File: aligner_sw.h

package info (click to toggle)
hisat2 2.1.0-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, buster, sid
  • size: 13,756 kB
  • sloc: cpp: 86,309; python: 12,230; sh: 2,171; perl: 936; makefile: 375
file content (648 lines) | stat: -rw-r--r-- 25,468 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
/*
 * Copyright 2011, Ben Langmead <langmea@cs.jhu.edu>
 *
 * This file is part of Bowtie 2.
 *
 * Bowtie 2 is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Bowtie 2 is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Bowtie 2.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 * aligner_sw.h
 *
 * Classes and routines for solving dynamic programming problems in aid of read
 * alignment.  Goals include the ability to handle:
 *
 * - Both read alignment, where the query must align end-to-end, and local
 *   alignment, where we seek a high-scoring alignment that need not involve
 *   the entire query.
 * - Situations where: (a) we've found a seed hit and are trying to extend it
 *   into a larger hit, (b) we've found an alignment for one mate of a pair and
 *   are trying to find a nearby alignment for the other mate, (c) we're
 *   aligning against an entire reference sequence.
 * - Caller-specified indicators for what columns of the dynamic programming
 *   matrix we are allowed to start in or end in.
 *
 * TODO:
 *
 * - A slicker way to filter out alignments that violate a ceiling placed on
 *   the number of Ns permitted in the reference portion of the alignment.
 *   Right now we accomplish this by masking out ending columns that correspond
 *   to *ungapped* alignments with too many Ns.  This results in false
 *   positives and false negatives for gapped alignments.  The margin of error
 *   (# of Ns by which we might miscount) is bounded by the number of gaps.
 */

/**
 *  |-maxgaps-|
 *  ***********oooooooooooooooooooooo    -
 *   ***********ooooooooooooooooooooo    |
 *    ***********oooooooooooooooooooo    |
 *     ***********ooooooooooooooooooo    |
 *      ***********oooooooooooooooooo    |
 *       ***********ooooooooooooooooo read len
 *        ***********oooooooooooooooo    |
 *         ***********ooooooooooooooo    |
 *          ***********oooooooooooooo    |
 *           ***********ooooooooooooo    |
 *            ***********oooooooooooo    -
 *            |-maxgaps-|
 *  |-readlen-|
 *  |-------skip--------|
 */

#ifndef ALIGNER_SW_H_
#define ALIGNER_SW_H_

#define INLINE_CUPS

#include <stdint.h>
#include <iostream>
#include <limits>
#include "threading.h"
#include <emmintrin.h>
#include "aligner_sw_common.h"
#include "aligner_sw_nuc.h"
#include "ds.h"
#include "aligner_seed.h"
#include "reference.h"
#include "random_source.h"
#include "mem_ids.h"
#include "aligner_result.h"
#include "mask.h"
#include "dp_framer.h"
#include "aligner_swsse.h"
#include "aligner_bt.h"

#define QUAL2(d, f) sc_->mm((int)(*rd_)[rdi_ + d], \
							(int)  rf_ [rfi_ + f], \
							(int)(*qu_)[rdi_ + d] - 33)
#define QUAL(d)     sc_->mm((int)(*rd_)[rdi_ + d], \
							(int)(*qu_)[rdi_ + d] - 33)
#define N_SNP_PEN(c) (((int)rf_[rfi_ + c] > 15) ? sc_->n(30) : sc_->penSnp)

/**
 * SwAligner
 * =========
 *
 * Ensapsulates facilities for alignment using dynamic programming.  Handles
 * alignment of nucleotide reads against known reference nucleotides.
 *
 * The class is stateful.  First the user must call init() to initialize the
 * object with details regarding the dynamic programming problem to be solved.
 * Next, the user calls align() to fill the dynamic programming matrix and
 * calculate summaries describing the solutions.  Finally the user calls 
 * nextAlignment(...), perhaps repeatedly, to populate the SwResult object with
 * the next result.  Results are dispensend in best-to-worst, left-to-right
 * order.
 *
 * The class expects the read string, quality string, and reference string
 * provided by the caller live at least until the user is finished aligning and
 * obtaining alignments from this object.
 *
 * There is a design tradeoff between hiding/exposing details of the genome and
 * its strands to the SwAligner.  In a sense, a better design is to hide
 * details such as the id of the reference sequence aligned to, or whether
 * we're aligning the read in its original forward orientation or its reverse
 * complement.  But this means that any alignment results returned by SwAligner
 * have to be extended to include those details before they're useful to the
 * caller.  We opt for messy but expedient - the reference id and orientation
 * of the read are given to SwAligner, remembered, and used to populate
 * SwResults.
 *
 * LOCAL VS GLOBAL
 *
 * The dynamic programming aligner supports both local and global alignment,
 * and one option in between.  To implement global alignment, the aligner (a)
 * allows negative scores (i.e. doesn't necessarily clamp them up to 0), (b)
 * checks in rows other than the last row for acceptable solutions, and (c)
 * optionally adds a bonus to the score for matches.
 * 
 * For global alignment, we:
 *
 * (a) Allow negative scores
 * (b) Check only in the last row
 * (c) Either add a bonus for matches or not (doesn't matter)
 *
 * For local alignment, we:
 *
 * (a) Clamp scores to 0
 * (b) Check in any row for a sufficiently high score
 * (c) Add a bonus for matches
 *
 * An in-between solution is to allow alignments to be curtailed on the
 * right-hand side if a better score can be achieved thereby, but not on the
 * left.  For this, we:
 *
 * (a) Allow negative scores
 * (b) Check in any row for a sufficiently high score
 * (c) Either add a bonus for matches or not (doesn't matter)
 *
 * REDUNDANT ALIGNMENTS
 *
 * When are two alignments distinct and when are they redundant (not distinct)?
 * At one extreme, we might say the best alignment from any given dynamic
 * programming problem is redundant with all other alignments from that
 # problem.  At the other extreme, we might say that any two alignments with
 * distinct starting points and edits are distinct.  The former is probably too
 * conservative for mate-finding DP problems.  The latter is certainly too
 * permissive, since two alignments that differ only in how gaps are arranged
 * should not be considered distinct.
 *
 * Some in-between solutions are:
 *
 * (a) If two alignments share an end point on either end, they are redundant.
 *     Otherwise, they are distinct.
 * (b) If two alignments share *both* end points, they are redundant.
 * (c) If two alignments share any cells in the DP table, they are redundant.
 * (d) 2 alignments are redundant if either end within N poss of each other
 * (e) Like (d) but both instead of either
 * (f, g) Like d, e, but where N is tied to maxgaps somehow
 *
 * Why not (a)?  One reason is that it's possible for two alignments to have
 * different start & end positions but share many cells.  Consider alignments 1
 * and 2 below; their end-points are labeled.
 *
 *  1 2
 *  \ \
 *    -\
 *      \
 *       \
 *        \
 *        -\
 *        \ \
 *        1 2
 *
 * 1 and 2 are distinct according to (a) but they share many cells in common.
 *
 * Why not (f, g)?  It fixes the problem with (a) above by forcing the
 * alignments to be spread so far that they can't possibly share diagonal cells
 * in common
 */
class SwAligner {

	typedef std::pair<size_t, size_t> SizeTPair;

	// States that the aligner can be in
	enum {
		STATE_UNINIT,  // init() hasn't been called yet
		STATE_INITED,  // init() has been called, but not align()
		STATE_ALIGNED, // align() has been called
	};
	
	const static size_t ALPHA_SIZE = 5;

public:

	explicit SwAligner() :
		sseU8fw_(DP_CAT),
		sseU8rc_(DP_CAT),
		sseI16fw_(DP_CAT),
		sseI16rc_(DP_CAT),
		state_(STATE_UNINIT),
		initedRead_(false),
		readSse16_(false),
		initedRef_(false),
		rfwbuf_(DP_CAT),
		btnstack_(DP_CAT),
		btcells_(DP_CAT),
		btdiag_(),
		btncand_(DP_CAT),
		btncanddone_(DP_CAT),
		btncanddoneSucc_(0),
		btncanddoneFail_(0),
		cper_(),
		cperMinlen_(),
		cperPerPow2_(),
		cperEf_(),
		cperTri_(),
		colstop_(0),
		lastsolcol_(0),
		cural_(0)
		ASSERT_ONLY(, cand_tmp_(DP_CAT))
	{ }

	/**
	 * Prepare the dynamic programming driver with a new read and a new scoring
	 * scheme.
	 */
	void initRead(
		const BTDnaString& rdfw, // read sequence for fw read
		const BTDnaString& rdrc, // read sequence for rc read
		const BTString& qufw,    // read qualities for fw read
		const BTString& qurc,    // read qualities for rc read
		size_t rdi,              // offset of first read char to align
		size_t rdf,              // offset of last read char to align
		const Scoring& sc);      // scoring scheme
	
	/**
	 * Initialize with a new alignment problem.
	 */
	void initRef(
		bool fw,               // whether to forward or revcomp read is aligning
		TRefId refidx,         // id of reference aligned against
		const DPRect& rect,    // DP rectangle
		char *rf,              // reference sequence
		size_t rfi,            // offset of first reference char to align to
		size_t rff,            // offset of last reference char to align to
		TRefOff reflen,        // length of reference sequence
		const Scoring& sc,     // scoring scheme
		TAlScore minsc,        // minimum score
		bool enable8,          // use 8-bit SSE if possible?
		size_t cminlen,        // minimum length for using checkpointing scheme
		size_t cpow2,          // interval b/t checkpointed diags; 1 << this
		bool doTri,            // triangular mini-fills?
		bool extend);          // true iff this is a seed extension

	/**
	 * Given a read, an alignment orientation, a range of characters in a
	 * referece sequence, and a bit-encoded version of the reference,
	 * execute the corresponding dynamic programming problem.
	 *
	 * Here we expect that the caller has already narrowed down the relevant
	 * portion of the reference (e.g. using a seed hit) and all we do is
	 * banded dynamic programming in the vicinity of that portion.  This is not
	 * the function to call if we are trying to solve the whole alignment
	 * problem with dynamic programming (that is TODO).
	 *
	 * Returns true if an alignment was found, false otherwise.
	 */
	void initRef(
		bool fw,               // whether to forward or revcomp read aligned
		TRefId refidx,         // reference aligned against
		const DPRect& rect,    // DP rectangle
		const BitPairReference& refs, // Reference strings
		TRefOff reflen,        // length of reference sequence
		const Scoring& sc,     // scoring scheme
		TAlScore minsc,        // minimum alignment score
		bool enable8,          // use 8-bit SSE if possible?
		size_t cminlen,        // minimum length for using checkpointing scheme
		size_t cpow2,          // interval b/t checkpointed diags; 1 << this
		bool doTri,            // triangular mini-fills?
		bool extend,           // true iff this is a seed extension
		size_t  upto,          // count the number of Ns up to this offset
		size_t& nsUpto);       // output: the number of Ns up to 'upto'

	/**
	 * Given a read, an alignment orientation, a range of characters in a
	 * referece sequence, and a bit-encoded version of the reference, set up
	 * and execute the corresponding ungapped alignment problem.  There can
	 * only be one solution.
	 *
	 * The caller has already narrowed down the relevant portion of the
	 * reference using, e.g., the location of a seed hit, or the range of
	 * possible fragment lengths if we're searching for the opposite mate in a
	 * pair.
	 */
	int ungappedAlign(
		const BTDnaString&      rd,     // read sequence (could be RC)
		const BTString&         qu,     // qual sequence (could be rev)
		const Coord&            coord,  // coordinate aligned to
		const BitPairReference& refs,   // Reference strings
		size_t                  reflen, // length of reference sequence
		const Scoring&          sc,     // scoring scheme
		bool                    ohang,  // allow overhang?
		TAlScore                minsc,  // minimum score
		SwResult&               res);   // put alignment result here

	/**
	 * Align read 'rd' to reference using read & reference information given
	 * last time init() was called.  Uses dynamic programming.
	 */
	bool align(RandomSource& rnd, TAlScore& best);
	
	/**
	 * Populate the given SwResult with information about the "next best"
	 * alignment if there is one.  If there isn't one, false is returned.  Note
	 * that false might be returned even though a call to done() would have
	 * returned false.
	 */
	bool nextAlignment(
		SwResult& res,
		TAlScore minsc,
		RandomSource& rnd);
	
	/**
	 * Print out an alignment result as an ASCII DP table.
	 */
	void printResultStacked(
		const SwResult& res,
		std::ostream& os)
	{
		res.alres.printStacked(*rd_, os);
	}
	
	/**
	 * Return true iff there are no more solution cells to backtace from.
	 * Note that this may return false in situations where there are actually
	 * no more solutions, but that hasn't been discovered yet.
	 */
	bool done() const {
		assert(initedRead() && initedRef());
		return cural_ == btncand_.size();
	}

	/**
	 * Return true iff this SwAligner has been initialized with a read to align.
	 */
	inline bool initedRef() const { return initedRef_; }

	/**
	 * Return true iff this SwAligner has been initialized with a reference to
	 * align against.
	 */
	inline bool initedRead() const { return initedRead_; }
	
	/**
	 * Reset, signaling that we're done with this dynamic programming problem
	 * and won't be asking for any more alignments.
	 */
	inline void reset() { initedRef_ = initedRead_ = false; }

#ifndef NDEBUG
	/**
	 * Check that aligner is internally consistent.
	 */
	bool repOk() const {
		assert_gt(dpRows(), 0);
		// Check btncand_
		for(size_t i = 0; i < btncand_.size(); i++) {
			assert(btncand_[i].repOk());
			assert_geq(btncand_[i].score, minsc_);
		}
		return true;
	}
#endif
	
	/**
	 * Return the number of alignments given out so far by nextAlignment().
	 */
	size_t numAlignmentsReported() const { return cural_; }

	/**
	 * Merge tallies in the counters related to filling the DP table.
	 */
	void merge(
		SSEMetrics& sseU8ExtendMet,
		SSEMetrics& sseU8MateMet,
		SSEMetrics& sseI16ExtendMet,
		SSEMetrics& sseI16MateMet,
		uint64_t&   nbtfiltst,
		uint64_t&   nbtfiltsc,
		uint64_t&   nbtfiltdo)
	{
		sseU8ExtendMet.merge(sseU8ExtendMet_);
		sseU8MateMet.merge(sseU8MateMet_);
		sseI16ExtendMet.merge(sseI16ExtendMet_);
		sseI16MateMet.merge(sseI16MateMet_);
		nbtfiltst += nbtfiltst_;
		nbtfiltsc += nbtfiltsc_;
		nbtfiltdo += nbtfiltdo_;
	}
	
	/**
	 * Reset all the counters related to filling in the DP table to 0.
	 */
	void resetCounters() {
		sseU8ExtendMet_.reset();
		sseU8MateMet_.reset();
		sseI16ExtendMet_.reset();
		sseI16MateMet_.reset();
		nbtfiltst_ = nbtfiltsc_ = nbtfiltdo_ = 0;
	}
	
	/**
	 * Return the size of the DP problem.
	 */
	size_t size() const {
		return dpRows() * (rff_ - rfi_);
	}

protected:
	
	/**
	 * Return the number of rows that will be in the dynamic programming table.
	 */
	inline size_t dpRows() const {
		assert(initedRead_);
		return rdf_ - rdi_;
	}

	/**
	 * Align nucleotides from read 'rd' to the reference string 'rf' using
	 * vector instructions.  Return the score of the best alignment found, or
	 * the minimum integer if an alignment could not be found.  Flag is set to
	 * 0 if an alignment is found, -1 if no valid alignment is found, or -2 if
	 * the score saturated at any point during alignment.
	 */
	TAlScore alignNucleotidesEnd2EndSseU8(  // unsigned 8-bit elements
		int& flag, bool debug);
	TAlScore alignNucleotidesLocalSseU8(    // unsigned 8-bit elements
		int& flag, bool debug);
	TAlScore alignNucleotidesEnd2EndSseI16( // signed 16-bit elements
		int& flag, bool debug);
	TAlScore alignNucleotidesLocalSseI16(   // signed 16-bit elements
		int& flag, bool debug);
	
	/**
	 * Aligns by filling a dynamic programming matrix with the SSE-accelerated,
	 * banded DP approach of Farrar.  As it goes, it determines which cells we
	 * might backtrace from and tallies the best (highest-scoring) N backtrace
	 * candidate cells per diagonal.  Also returns the alignment score of the best
	 * alignment in the matrix.
	 *
	 * This routine does *not* maintain a matrix holding the entire matrix worth of
	 * scores, nor does it maintain any other dense O(mn) data structure, as this
	 * would quickly exhaust memory for queries longer than about 10,000 kb.
	 * Instead, in the fill stage it maintains two columns worth of scores at a
	 * time (current/previous, or right/left) - these take O(m) space.  When
	 * finished with the current column, it determines which cells from the
	 * previous column, if any, are candidates we might backtrace from to find a
	 * full alignment.  A candidate cell has a score that rises above the threshold
	 * and isn't improved upon by a match in the next column.  The best N
	 * candidates per diagonal are stored in a O(m + n) data structure.
	 */
	TAlScore alignGatherEE8(                // unsigned 8-bit elements
		int& flag, bool debug);
	TAlScore alignGatherLoc8(               // unsigned 8-bit elements
		int& flag, bool debug);
	TAlScore alignGatherEE16(               // signed 16-bit elements
		int& flag, bool debug);
	TAlScore alignGatherLoc16(              // signed 16-bit elements
		int& flag, bool debug);
	
	/**
	 * Build query profile look up tables for the read.  The query profile look
	 * up table is organized as a 1D array indexed by [i][j] where i is the
	 * reference character in the current DP column (0=A, 1=C, etc), and j is
	 * the segment of the query we're currently working on.
	 */
	void buildQueryProfileEnd2EndSseU8(bool fw);
	void buildQueryProfileLocalSseU8(bool fw);

	/**
	 * Build query profile look up tables for the read.  The query profile look
	 * up table is organized as a 1D array indexed by [i][j] where i is the
	 * reference character in the current DP column (0=A, 1=C, etc), and j is
	 * the segment of the query we're currently working on.
	 */
	void buildQueryProfileEnd2EndSseI16(bool fw);
	void buildQueryProfileLocalSseI16(bool fw);
	
	bool gatherCellsNucleotidesLocalSseU8(TAlScore best);
	bool gatherCellsNucleotidesEnd2EndSseU8(TAlScore best);

	bool gatherCellsNucleotidesLocalSseI16(TAlScore best);
	bool gatherCellsNucleotidesEnd2EndSseI16(TAlScore best);

	bool backtraceNucleotidesLocalSseU8(
		TAlScore       escore, // in: expected score
		SwResult&      res,    // out: store results (edits and scores) here
		size_t&        off,    // out: store diagonal projection of origin
		size_t&        nbts,   // out: # backtracks
		size_t         row,    // start in this rectangle row
		size_t         col,    // start in this rectangle column
		RandomSource&  rand);  // random gen, to choose among equal paths

	bool backtraceNucleotidesLocalSseI16(
		TAlScore       escore, // in: expected score
		SwResult&      res,    // out: store results (edits and scores) here
		size_t&        off,    // out: store diagonal projection of origin
		size_t&        nbts,   // out: # backtracks
		size_t         row,    // start in this rectangle row
		size_t         col,    // start in this rectangle column
		RandomSource&  rand);  // random gen, to choose among equal paths

	bool backtraceNucleotidesEnd2EndSseU8(
		TAlScore       escore, // in: expected score
		SwResult&      res,    // out: store results (edits and scores) here
		size_t&        off,    // out: store diagonal projection of origin
		size_t&        nbts,   // out: # backtracks
		size_t         row,    // start in this rectangle row
		size_t         col,    // start in this rectangle column
		RandomSource&  rand);  // random gen, to choose among equal paths

	bool backtraceNucleotidesEnd2EndSseI16(
		TAlScore       escore, // in: expected score
		SwResult&      res,    // out: store results (edits and scores) here
		size_t&        off,    // out: store diagonal projection of origin
		size_t&        nbts,   // out: # backtracks
		size_t         row,    // start in this rectangle row
		size_t         col,    // start in this rectangle column
		RandomSource&  rand);  // random gen, to choose among equal paths

	bool backtrace(
		TAlScore       escore, // in: expected score
		bool           fill,   // in: use mini-fill?
		bool           usecp,  // in: use checkpoints?
		SwResult&      res,    // out: store results (edits and scores) here
		size_t&        off,    // out: store diagonal projection of origin
		size_t         row,    // start in this rectangle row
		size_t         col,    // start in this rectangle column
		size_t         maxiter,// max # extensions to try
		size_t&        niter,  // # extensions tried
		RandomSource&  rnd)    // random gen, to choose among equal paths
	{
		bter_.initBt(
			escore,              // in: alignment score
			row,                 // in: start in this row
			col,                 // in: start in this column
			fill,                // in: use mini-fill?
			usecp,               // in: use checkpoints?
			cperTri_,            // in: triangle-shaped mini-fills?
			rnd);                // in: random gen, to choose among equal paths
		assert(bter_.inited());
		size_t nrej = 0;
		if(bter_.emptySolution()) {
			return false;
		} else {
			return bter_.nextAlignment(maxiter, res, off, nrej, niter, rnd);
		}
	}

	const BTDnaString  *rd_;     // read sequence
	const BTString     *qu_;     // read qualities
	const BTDnaString  *rdfw_;   // read sequence for fw read
	const BTDnaString  *rdrc_;   // read sequence for rc read
	const BTString     *qufw_;   // read qualities for fw read
	const BTString     *qurc_;   // read qualities for rc read
	TReadOff            rdi_;    // offset of first read char to align
	TReadOff            rdf_;    // offset of last read char to align
	bool                fw_;     // true iff read sequence is original fw read
	TRefId              refidx_; // id of reference aligned against
	TRefOff             reflen_; // length of entire reference sequence
	const DPRect*       rect_;   // DP rectangle
	char               *rf_;     // reference sequence
	TRefOff             rfi_;    // offset of first ref char to align to
	TRefOff             rff_;    // offset of last ref char to align to (excl)
	size_t              rdgap_;  // max # gaps in read
	size_t              rfgap_;  // max # gaps in reference
	bool                enable8_;// enable 8-bit sse
	bool                extend_; // true iff this is a seed-extend problem
	const Scoring      *sc_;     // penalties for edit types
	TAlScore            minsc_;  // penalty ceiling for valid alignments
	int                 nceil_;  // max # Ns allowed in ref portion of aln

	bool                sse8succ_;  // whether 8-bit worked
	bool                sse16succ_; // whether 16-bit worked
	SSEData             sseU8fw_;   // buf for fw query, 8-bit score
	SSEData             sseU8rc_;   // buf for rc query, 8-bit score
	SSEData             sseI16fw_;  // buf for fw query, 16-bit score
	SSEData             sseI16rc_;  // buf for rc query, 16-bit score
	bool                sseU8fwBuilt_;   // built fw query profile, 8-bit score
	bool                sseU8rcBuilt_;   // built rc query profile, 8-bit score
	bool                sseI16fwBuilt_;  // built fw query profile, 16-bit score
	bool                sseI16rcBuilt_;  // built rc query profile, 16-bit score

	SSEMetrics			sseU8ExtendMet_;
	SSEMetrics			sseU8MateMet_;
	SSEMetrics			sseI16ExtendMet_;
	SSEMetrics			sseI16MateMet_;

	int                 state_;        // state
	bool                initedRead_;   // true iff initialized with initRead
	bool                readSse16_;    // true -> sse16 from now on for read
	bool                initedRef_;    // true iff initialized with initRef
	EList<uint32_t>     rfwbuf_;       // buffer for wordized ref stretches
	
	EList<DpNucFrame>    btnstack_;    // backtrace stack for nucleotides
	EList<SizeTPair>     btcells_;     // cells involved in current backtrace

	NBest<DpBtCandidate> btdiag_;      // per-diagonal backtrace candidates
	EList<DpBtCandidate> btncand_;     // cells we might backtrace from
	EList<DpBtCandidate> btncanddone_; // candidates that we investigated
	size_t              btncanddoneSucc_; // # investigated and succeeded
	size_t              btncanddoneFail_; // # investigated and failed
	
	BtBranchTracer       bter_;        // backtracer
	
	Checkpointer         cper_;        // structure for saving checkpoint cells
	size_t               cperMinlen_;  // minimum length for using checkpointer
	size_t               cperPerPow2_; // checkpoint every 1 << perpow2 diags (& next)
	bool                 cperEf_;      // store E and F in addition to H?
	bool                 cperTri_;     // checkpoint for triangular mini-fills?
	
	size_t              colstop_;      // bailed on DP loop after this many cols
	size_t              lastsolcol_;   // last DP col with valid cell
	size_t              cural_;        // index of next alignment to be given
	
	uint64_t nbtfiltst_; // # candidates filtered b/c starting cell was seen
	uint64_t nbtfiltsc_; // # candidates filtered b/c score uninteresting
	uint64_t nbtfiltdo_; // # candidates filtered b/c dominated by other cell
	
	ASSERT_ONLY(SStringExpandable<uint32_t> tmp_destU32_);
	ASSERT_ONLY(BTDnaString tmp_editstr_, tmp_refstr_);
	ASSERT_ONLY(EList<DpBtCandidate> cand_tmp_);
};

#endif /*ALIGNER_SW_H_*/