File: group_walk.h

package info (click to toggle)
centrifuge 1.0.3-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, buster, sid
  • size: 11,864 kB
  • sloc: cpp: 51,936; perl: 1,919; python: 1,538; makefile: 618; sh: 352
file content (1285 lines) | stat: -rw-r--r-- 37,406 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
/*
 * Copyright 2011, Ben Langmead <langmea@cs.jhu.edu>
 *
 * This file is part of Bowtie 2.
 *
 * Bowtie 2 is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Bowtie 2 is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Bowtie 2.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 * group_walk.h
 *
 * Classes and routines for walking a set of BW ranges backwards from the edge
 * of a seed hit with the goal of resolving the offset of each row in each
 * range.  Here "offset" means offset into the concatenated string of all
 * references.  The main class is 'GroupWalk' and an important helper is
 * 'GWState'.
 *
 * For each combination of seed offset and orientation, there is an associated
 * QVal.  Each QVal describes a (possibly empty) set of suffix array ranges.
 * Call these "seed range sets."  Each range in the set is "backed" by a range
 * of the salist, represented as a PListSlice. Such a range is the origin of a
 * walk.
 *
 * When an offset is resolved, it is entered into the salist via the
 * PListSlice.  Note that other routines in this same thread might also be
 * setting elements of the salist, so routines here should expect that elements
 * can go from unresolved to resolved at any time.
 *
 * What bookkeeping do we have to do as we walk?  Before the first step, we
 * convert the initial QVal into a list of SATuples; the SATuples are our link
 * to the correpsonding ranges in the suffix array.  The list of SATuples is
 * then converted to a list of GWState objects; these keep track of where we
 * are in our walk (e.g. what 'top' and 'bot' are, how many steps have we gone,
 * etc) as well as how the elements in the current range correspond to elements
 * from the original range.
 *
 * The user asks the GroupWalk to resolve another offset by calling advance().
 * advance() can be called in various ways:
 *
 * (a) The user can request that the GroupWalk proceed until a
 *     *particular* element is resolved, then return that resolved
 *     element.  Other elements may be resolved along the way, but
 *     those results are buffered and may be dispensed in future calls
 *     to advance().
 *
 * (b) The user can request that the GroupWalk select an as-yet-
 *     unreported element at random and and proceed until that element
 *     is resolved and report it.  Again, other elements may be
 *     resolved along the way but they are buffered.
 *
 * (c) The user can request that the GroupWalk resolve elements in a
 *     particular BW range (with a particular offset and orientation)
 *     in an order of its choosing.  The GroupWalk in this case
 *     attempts to resolve as many offsets as possible as quickly as
 *     possible, and returns them as soon as they're found.  The res_
 *     buffer is used in this case.
 *
 * (d) Like (c) but resolving elements at a paritcular offset and
 *     orientation instead of at a specific BW range.  The res_ buffer
 *     is used in this case, since there's a chance that the 
 *
 * There are simple ways to heuristically reduce the problem size while
 * maintaining randomness.  For instance, the user put a ceiling on the
 * number of elements that we walk from any given seed offset or range.
 * We can then trim away random subranges to reduce the size of the
 * problem.  There is no need for the caller to do this for us.
 */

#ifndef GROUP_WALK_H_
#define GROUP_WALK_H_

#include <stdint.h>
#include <limits>
#include "ds.h"
#include "bt2_idx.h"
#include "read.h"
#include "reference.h"
#include "mem_ids.h"

/**
 * Encapsulate an SA range and an associated list of slots where the resolved
 * offsets can be placed.
 */
template<typename T>
class SARangeWithOffs {

public:

	SARangeWithOffs() { reset(); };

	SARangeWithOffs(TIndexOffU tf, size_t len, const T& o) {
		init(tf, len, o);
	}
	
	void init(TIndexOffU tf, size_t len_, const T& o) {
		topf = tf; len = len_, offs = o;
	}

	/**
	 * Reset to uninitialized state.
	 */
	void reset() { topf = std::numeric_limits<TIndexOffU>::max(); }
	
	/**
	 * Return true if this is initialized.
	 */
	bool inited() const {
		return topf != std::numeric_limits<TIndexOffU>::max();
	}
	
	/**
	 * Return the number of times this reference substring occurs in the
	 * reference, which is also the size of the 'offs' TSlice.
	 */
	size_t size() const { return offs.size(); }

	TIndexOffU topf; // top in BWT index
	size_t    len;  // length of the reference sequence involved
	T         offs; // offsets
};

/**
 * A group of per-thread state that can be shared between all the GroupWalks
 * used in that thread.
 */
template <typename index_t>
struct GroupWalkState {

	GroupWalkState(int cat) : map(cat) {
		masks[0].setCat(cat);
		masks[1].setCat(cat);
		masks[2].setCat(cat);
		masks[3].setCat(cat);
	}

	EList<bool> masks[4];      // temporary list for masks; used in GWState
	EList<index_t, 16> map;   // temporary list of GWState maps
};

/**
 * Encapsulates counters that encode how much work the walk-left logic
 * has done.
 */
struct WalkMetrics {

	WalkMetrics() {
	    reset();
	}

	/**
	 * Sum each across this object and 'm'.  This is the only safe way
	 * to update a WalkMetrics shared by many threads.
	 */
	void merge(const WalkMetrics& m, bool getLock = false) {
		ThreadSafe ts(&mutex_m, getLock);
		bwops += m.bwops;
		branches += m.branches;
		resolves += m.resolves;
		refresolves += m.refresolves;
		reports += m.reports;
	}
	
	/**
	 * Set all to 0.
	 */
	void reset() {
		bwops = branches = resolves = refresolves = reports = 0;
	}

	uint64_t bwops;       // Burrows-Wheeler operations
	uint64_t branches;    // BW range branch-offs
	uint64_t resolves;    // # offs resolved with BW walk-left
	uint64_t refresolves; // # resolutions caused by reference scanning
	uint64_t reports;     // # offs reported (1 can be reported many times)
	MUTEX_T mutex_m;
};

/**
 * Coordinates for a BW element that the GroupWalk might resolve.
 */
template <typename index_t>
struct GWElt {

	GWElt() { reset(); }
	
	/**
	 * Reset GWElt to uninitialized state.
	 */
	void reset() {
		offidx = range = elt = len = (index_t)0xffffffff;
		fw = false;
	}

	/**
	 * Initialize this WalkResult.
	 */
	void init(
		index_t oi,
		bool f,
		index_t r,
		index_t e,
		index_t l)
	{
		offidx = oi;
		fw = f;
		range = r;
		elt = e;
		len = l;
	}

	/**
	 * Return true iff this GWElt and the given GWElt refer to the same
	 * element.
	 */
	bool operator==(const GWElt& o) const {
		return offidx == o.offidx &&
		       fw == o.fw &&
		       range == o.range &&
		       elt == o.elt &&
		       len == o.len;
	}
	
	/**
	 * Return true iff this GWElt and the given GWElt refer to
	 * different elements.
	 */
	bool operator!=(const GWElt& o) const {
		return !(*this == o);
	}

	index_t offidx; // seed offset index
	bool    fw;     // strand
	index_t range;  // range
	index_t elt;    // element
	index_t len;    // length
};

/**
 * A record encapsulating the result of looking up one BW element in
 * the Bowtie index.
 */
template <typename index_t>
struct WalkResult {

	WalkResult() { reset(); }
	
	/**
	 * Reset GWElt to uninitialized state.
	 */
	void reset() {
		elt.reset();
		bwrow = toff = (index_t)OFF_MASK;
	}

	/**
	 * Initialize this WalkResult.
	 */
	void init(
		index_t oi,  // seed offset index
		bool f,       // strand
		index_t r,   // range
		index_t e,   // element
		index_t bwr, // BW row
		index_t len, // length
		index_t to)  // text offset
	{
		elt.init(oi, f, r, e, len);
		bwrow = bwr;
		toff = to;
	}

	GWElt<index_t> elt;   // element resolved
	index_t        bwrow; // SA row resolved
	index_t        toff;  // resolved offset from SA sample
};

/**
 * A GW hit encapsulates an SATuple describing a reference substring
 * in the cache, along with a bool indicating whether each element of
 * the hit has been reported yet.
 */
template<typename index_t, typename T>
class GWHit {

public:
	GWHit() :
		fmap(0, GW_CAT),
		offidx((index_t)OFF_MASK),
		fw(false),
		range((index_t)OFF_MASK),
		len((index_t)OFF_MASK),
		reported_(0, GW_CAT),
		nrep_(0)
	{
		assert(repOkBasic());
	}

	/**
	 * Initialize with a new SA range.  Resolve the done vector so that
	 * there's one bool per suffix array element.
	 */
	void init(
		SARangeWithOffs<T>& sa,
		index_t oi,
		bool f,
		index_t r)
	{
		nrep_ = 0;
		offidx = oi;
		fw = f;
		range = r;
		len = (index_t)sa.len;
		reported_.resize(sa.offs.size());
		reported_.fill(false);
		fmap.resize(sa.offs.size());
		fmap.fill(make_pair((index_t)OFF_MASK, (index_t)OFF_MASK));
	}
	
	/**
	 * Clear contents of sat and done.
	 */
	void reset() {
		reported_.clear();
		fmap.clear();
		nrep_ = 0;
		offidx = (index_t)OFF_MASK;
		fw = false;
		range = (index_t)OFF_MASK;
		len = (index_t)OFF_MASK;
	}
	
#ifndef NDEBUG
	/**
	 * Check that GWHit is internally consistent.  If a pointer to an
	 * EList of GWStates is given, we assume that it is the EList
	 * corresponding to this GWHit and check whether the forward and
	 * reverse mappings match up for the as-yet-unresolved elements.
	 */
	bool repOk(const SARangeWithOffs<T>& sa) const {
		assert_eq(reported_.size(), sa.offs.size());
		assert_eq(fmap.size(), sa.offs.size());
		// Shouldn't be any repeats among as-yet-unresolveds
		size_t nrep = 0;
		for(size_t i = 0; i < fmap.size(); i++) {
			if(reported_[i]) nrep++;
			if(sa.offs[i] != (index_t)OFF_MASK) {
				continue;
			}
			for(size_t j = i+1; j < fmap.size(); j++) {
				if(sa.offs[j] != (index_t)OFF_MASK) {
					continue;
				}
				assert(fmap[i] != fmap[j]);
			}
		}
		assert_eq(nrep_, nrep);
		return true;
	}

	/**
	 * Return true iff this GWHit is not obviously corrupt.
	 */
	bool repOkBasic() {
		return true;
	}
#endif
	
	/**
	 * Set the ith element to be reported.
	 */
	void setReported(index_t i) {
		assert(!reported_[i]);
		assert_lt(i, reported_.size());
		reported_[i] = true;
		nrep_++;
	}
	
	/**
	 * Return true iff element i has been reported.
	 */
	bool reported(index_t i) const {
		assert_lt(i, reported_.size());
		return reported_[i];
	}
	
	/**
	 * Return true iff all elements have been reported.
	 */
	bool done() const {
		assert_leq(nrep_, reported_.size());
		return nrep_ == reported_.size();
	}

	EList<std::pair<index_t, index_t>, 16> fmap; // forward map; to GWState & elt
	index_t offidx; // offset idx
	bool fw;         // orientation
	index_t range;  // original range index
	index_t len;    // length of hit

protected:

	EList<bool, 16> reported_; // per-elt bool indicating whether it's been reported
	index_t nrep_;
};

/**
 * Encapsulates the progress made along a particular path from the original
 * range.
 */
template<typename index_t, typename T>
class GWState {
	
public:

	GWState() : map_(0, GW_CAT) {
		reset(); assert(repOkBasic());
	}
	
	/**
	 * Initialize this GWState with new ebwt, top, bot, step, and sat.
	 *
	 * We assume map is already set up.
	 *
	 * Returns true iff at least one elt was resolved.
	 */
	template<int S>
	pair<int, int> init(
		const Ebwt<index_t>& ebwt,    // index to walk left in
		const BitPairReference& ref,  // bitpair-encoded reference
		SARangeWithOffs<T>& sa,       // SA range with offsets
		EList<GWState, S>& sts,       // EList of GWStates for range being advanced
		GWHit<index_t, T>& hit,       // Corresponding hit structure
		index_t range,                // which range is this?
		bool reportList,              // if true, "report" resolved offsets immediately by adding them to 'res' list
		EList<WalkResult<index_t>, 16>* res,   // EList where resolved offsets should be appended
		index_t tp,                   // top of range at this step
		index_t bt,                   // bot of range at this step
		index_t st,                   // # steps taken to get to this step
		WalkMetrics& met)
	{
		assert_gt(bt, tp);
		assert_lt(range, sts.size());
		top = tp;
		bot = bt;
		step = (int)st;
		assert(!inited_);
		ASSERT_ONLY(inited_ = true);
		ASSERT_ONLY(lastStep_ = step-1);
		return init(ebwt, ref, sa, sts, hit, range, reportList, res, met);
	}

	/**
	 * Initialize this GWState.
	 *
	 * We assume map is already set up, and that 'step' is equal to the
	 * number of steps taken to get to the new top/bot pair *currently*
	 * in the top and bot fields.
	 *
	 * Returns a pair of numbers, the first being the number of
	 * resolved but unreported offsets found during this advance, the
	 * second being the number of as-yet-unresolved offsets.
	 */
	template<int S>
	pair<int, int> init(
		const Ebwt<index_t>& ebwt,    // forward Bowtie index
		const BitPairReference& ref,  // bitpair-encoded reference
		SARangeWithOffs<T>& sa,       // SA range with offsets
		EList<GWState, S>& st,        // EList of GWStates for advancing range
		GWHit<index_t, T>& hit,       // Corresponding hit structure
		index_t range,                // range being inited
		bool reportList,              // report resolutions, adding to 'res' list?
		EList<WalkResult<index_t>, 16>* res,   // EList to append resolutions
		WalkMetrics& met)             // update these metrics
	{
		assert(inited_);
		assert_eq(step, lastStep_+1);
		ASSERT_ONLY(lastStep_++);
		assert_leq((index_t)step, ebwt.eh().len());
		assert_lt(range, st.size());
		pair<int, int> ret = make_pair(0, 0);
		index_t trimBegin = 0, trimEnd = 0;
		bool empty = true; // assume all resolved until proven otherwise
		// Commit new information, if any, to the PListSlide.  Also,
		// trim and check if we're done.
		for(size_t i = mapi_; i < map_.size(); i++) {
			bool resolved = (off(i, sa) != (index_t)OFF_MASK);
			if(!resolved) {
				// Elt not resolved yet; try to resolve it now
				index_t bwrow = (index_t)(top - mapi_ + i);
				index_t toff = ebwt.tryOffset(bwrow);
				ASSERT_ONLY(index_t origBwRow = sa.topf + map(i));
				assert_eq(bwrow, ebwt.walkLeft(origBwRow, step));
				if(toff != (index_t)OFF_MASK) {
					// Yes, toff was resolvable
					assert_eq(toff, ebwt.getOffset(bwrow));
					met.resolves++;
#ifdef CENTRIFUGE
#else
					toff += step;
                    assert_eq(toff, ebwt.getOffset(origBwRow));
#endif
					setOff(i, toff, sa, met);
					if(!reportList) ret.first++;
#if 0
// used to be #ifndef NDEBUG, but since we no longer require that the reference
// string info be included, this is no longer relevant.

					// Sanity check that the reference characters under this
					// hit match the seed characters in hit.satup->key.seq.
					// This is NOT a check that we associated the exact right
					// text offset with the BW row.  This is an important
					// distinction because when resolved offsets are filled in
					// via refernce scanning, they are not necessarily the
					// exact right text offsets to associate with the
					// respective BW rows but they WILL all be correct w/r/t
					// the reference sequence underneath, which is what really
					// matters here.
					index_t tidx = (index_t)OFF_MASK, tof, tlen;
					bool straddled = false;
					ebwt.joinedToTextOff(
						hit.len, // length of seed
						toff,    // offset in joined reference string
						tidx,    // reference sequence id
						tof,     // offset in reference coordinates
						tlen,    // length of reference sequence
						true,    // don't reject straddlers
						straddled);
					if(tidx != (index_t)OFF_MASK &&
					   hit.satup->key.seq != std::numeric_limits<uint64_t>::max())
					{
						// key: 2-bit characters packed into a 64-bit word with
						// the least significant bitpair corresponding to the
						// rightmost character on the Watson reference strand.
						uint64_t key = hit.satup->key.seq;
						for(int64_t j = tof + hit.len-1; j >= tof; j--) {
							// Get next reference base to the left
							int c = ref.getBase(tidx, j);
							assert_range(0, 3, c);
							// Must equal least significant bitpair of key
							if(c != (int)(key & 3)) {
								// Oops; when we jump to the piece of the
								// reference where the seed hit is, it doesn't
								// match the seed hit.  Before dying, check
								// whether we have the right spot in the joined
								// reference string
								SString<char> jref;
								ebwt.restore(jref);
								uint64_t key2 = hit.satup->key.seq;
								for(int64_t k = toff + hit.len-1; k >= toff; k--) {
									int c = jref[k];
									assert_range(0, 3, c);
									assert_eq(c, (int)(key2 & 3));
									key2 >>= 2;
								}
								assert(false);
							}
							key >>= 2;
						}
					}
#endif
				}
			}
			// Is the element resolved?  We ask this regardless of how it was
			// resolved (whether this function did it just now, whether it did
			// it a while ago, or whether some other function outside GroupWalk
			// did it).
			if(off(i, sa) != (index_t)OFF_MASK) {
				if(reportList && !hit.reported(map(i))) {
					// Report it
					index_t toff = off(i, sa);
					assert(res != NULL);
					res->expand();
					index_t origBwRow = sa.topf + map(i);
					res->back().init(
						hit.offidx, // offset idx
						hit.fw,     // orientation
						hit.range,  // original range index
						map(i),     // original element offset
						origBwRow,  // BW row resolved
						hit.len,    // hit length
						toff);      // text offset
					hit.setReported(map(i));
					met.reports++;
				}
				// Offset resolved
				if(empty) {
					// Haven't seen a non-empty entry yet, so we
					// can trim this from the beginning.
					trimBegin++;
				} else {
					trimEnd++;
				}
			} else {
				// Offset not yet resolved
				ret.second++;
				trimEnd = 0;
				empty = false;
				// Set the forward map in the corresponding GWHit
				// object to point to the appropriate element of our
				// range
				assert_geq(i, mapi_);
				index_t bmap = map(i);
				hit.fmap[bmap].first = range;
				hit.fmap[bmap].second = (index_t)i;
#ifndef NDEBUG
				for(size_t j = 0; j < bmap; j++) {
					if(sa.offs[j] == (index_t)OFF_MASK &&
					   hit.fmap[j].first == range)
					{
						assert_neq(i, hit.fmap[j].second);
					}
				}
#endif
			}
		}
		// Trim from beginning
		assert_geq(trimBegin, 0);
		mapi_ += trimBegin;
		top += trimBegin;
		if(trimEnd > 0) {
			// Trim from end
			map_.resize(map_.size() - trimEnd);
			bot -= trimEnd;
		}
		if(empty) {
			assert(done());
#ifndef NDEBUG
			// If range is done, all elements from map should be
			// resolved
			for(size_t i = mapi_; i < map_.size(); i++) {
				assert_neq((index_t)OFF_MASK, off(i, sa));
			}
			// If this range is done, then it should be the case that
			// all elements in the corresponding GWHit that point to
			// this range are resolved.
			for(size_t i = 0; i < hit.fmap.size(); i++) {
				if(sa.offs[i] == (index_t)OFF_MASK) {
					assert_neq(range, hit.fmap[i].first);
				}
			}
#endif
			return ret;
		} else {
			assert(!done());
		}
		// Is there a dollar sign in the middle of the range?
		assert_neq(top, ebwt._zOff);
		assert_neq(bot-1, ebwt._zOff);
		if(ebwt._zOff > top && ebwt._zOff < bot-1) {
			// Yes, the dollar sign is in the middle of this range.  We
			// must split it into the two ranges on either side of the
			// dollar.  Let 'bot' and 'top' delimit the portion of the
			// range prior to the dollar.
			index_t oldbot = bot;
			bot = ebwt._zOff;
			// Note: might be able to do additional trimming off the
			// end.
			// Create a new range for the portion after the dollar.
			st.expand();
			st.back().reset();
			index_t ztop = ebwt._zOff+1;
			st.back().initMap(oldbot - ztop);
			assert_eq((index_t)map_.size(), oldbot-top+mapi_);
			for(index_t i = ztop; i < oldbot; i++) {
				st.back().map_[i - ztop] = map(i-top+mapi_);
			}
			map_.resize(bot - top + mapi_);
			st.back().init(
				ebwt,
				ref,
				sa,
				st,
				hit,
				(index_t)st.size()-1,
				reportList,
				res,
				ztop,
				oldbot,
				step,
				met);
		}
		assert_gt(bot, top);
		// Prepare SideLocus's for next step
		if(bot-top > 1) {
			SideLocus<index_t>::initFromTopBot(top, bot, ebwt.eh(), ebwt.ebwt(), tloc, bloc);
			assert(tloc.valid()); assert(tloc.repOk(ebwt.eh()));
			assert(bloc.valid()); assert(bloc.repOk(ebwt.eh()));
		} else {
			tloc.initFromRow(top, ebwt.eh(), ebwt.ebwt());
			assert(tloc.valid()); assert(tloc.repOk(ebwt.eh()));
			bloc.invalidate();
		}
		return ret;
	}
	
#ifndef NDEBUG
	/**
	 * Check if this GWP is internally consistent.
	 */
	bool repOk(
		const Ebwt<index_t>& ebwt,
		GWHit<index_t, T>& hit,
		index_t range) const
	{
		assert(done() || bot > top);
		assert(doneResolving(hit) || (tloc.valid() && tloc.repOk(ebwt.eh())));
		assert(doneResolving(hit) || bot == top+1 || (bloc.valid() && bloc.repOk(ebwt.eh())));
		assert_eq(map_.size()-mapi_, bot-top);
		// Make sure that 'done' is compatible with whether we have >=
		// 1 elements left to resolve.
		int left = 0;
		for(size_t i = mapi_; i < map_.size(); i++) {
			ASSERT_ONLY(index_t row = (index_t)(top + i - mapi_));
			ASSERT_ONLY(index_t origRow = hit.satup->topf + map(i));
			assert(step == 0 || row != origRow);
			assert_eq(row, ebwt.walkLeft(origRow, step));
			assert_lt(map_[i], hit.satup->offs.size());
			if(off(i, hit) == (index_t)OFF_MASK) left++;
		}
		assert(repOkMapRepeats());
		assert(repOkMapInclusive(hit, range));
		return true;
	}
	
	/**
	 * Return true iff this GWState is not obviously corrupt.
	 */
	bool repOkBasic() {
		assert_geq(bot, top);
		return true;
	}

	/**
	 * Check that the fmap elements pointed to by our map_ include all
	 * of the fmap elements that point to this range.
	 */
	bool repOkMapInclusive(GWHit<index_t, T>& hit, index_t range) const {
		for(size_t i = 0; i < hit.fmap.size(); i++) {
			if(hit.satup->offs[i] == (index_t)OFF_MASK) {
				if(range == hit.fmap[i].first) {
					ASSERT_ONLY(bool found = false);
					for(size_t j = mapi_; j < map_.size(); j++) {
						if(map(j) == i) {
							ASSERT_ONLY(found = true);
							break;
						}
					}
					assert(found);
				}
			}
		}
		return true;
	}
	
	/**
	 * Check that no two elements in map_ are the same.
	 */
	bool repOkMapRepeats() const {
		for(size_t i = mapi_; i < map_.size(); i++) {
			for(size_t j = i+1; j < map_.size(); j++) {
				assert_neq(map_[i], map_[j]);
			}
		}
		return true;
	}
#endif
	
	/**
	 * Return the offset currently assigned to the ith element.  If it
	 * has not yet been resolved, return 0xffffffff.
	 */
	index_t off(
				index_t i,
				const SARangeWithOffs<T>& sa)
	{
		assert_geq(i, mapi_);
		assert_lt(i, map_.size());
		assert_lt(map_[i], sa.offs.size());
		return sa.offs.get(map_[i]);
	}

	/**
	 * Return the offset of the element within the original range's
	 * PListSlice that the ith element of this range corresponds to.
	 */
	index_t map(index_t i) const {
		assert_geq(i, mapi_);
		assert_lt(i, map_.size());
		return map_[i];
	}

	/**
	 * Return the offset of the first untrimmed offset in the map.
	 */
	index_t mapi() const {
		return mapi_;
	}

	/**
	 * Return number of active elements in the range being tracked by
	 * this GWState.
	 */
	index_t size() const {
		return map_.size() - mapi_;
	}
	
	/**
	 * Return true iff all elements in this leaf range have been
	 * resolved.
	 */
	bool done() const {
		return size() == 0;
	}

	/**
	 * Set the PListSlice element that corresponds to the ith element
	 * of 'map' to the specified offset.
	 */
	void setOff(
		index_t i,
		index_t off,
		SARangeWithOffs<T>& sa,
		WalkMetrics& met)
	{
		assert_lt(i + mapi_, map_.size());
		assert_lt(map_[i + mapi_], sa.offs.size());
		size_t saoff = map_[i + mapi_];
		sa.offs[saoff] = off;
		assert_eq(off, sa.offs[saoff]);
	}

	/**
	 * Advance this GWState by one step (i.e. one BW operation).  In
	 * the event of a "split", more elements are added to the EList
	 * 'st', which must have room for at least 3 more elements without
	 * needing another expansion.  If an expansion of 'st' is
	 * triggered, this GWState object becomes invalid.
	 *
	 * Returns a pair of numbers, the first being the number of
	 * resolved but unreported offsets found during this advance, the
	 * second being the number of as-yet-unresolved offsets.
	 */
	template <int S>
	pair<int, int> advance(
		const Ebwt<index_t>& ebwt,   // the forward Bowtie index, for stepping left
		const BitPairReference& ref, // bitpair-encoded reference
		SARangeWithOffs<T>& sa,      // SA range with offsets
		GWHit<index_t, T>& hit,      // the associated GWHit object
		index_t range,               // which range is this?
		bool reportList,             // if true, "report" resolved offsets immediately by adding them to 'res' list
		EList<WalkResult<index_t>, 16>* res,  // EList where resolved offsets should be appended
		EList<GWState, S>& st,       // EList of GWStates for range being advanced
		GroupWalkState<index_t>& gws,         // temporary storage for masks
		WalkMetrics& met,
		PerReadMetrics& prm)
	{
		ASSERT_ONLY(index_t origTop = top);
		ASSERT_ONLY(index_t origBot = bot);
		assert_geq(step, 0);
		assert_eq(step, lastStep_);
		assert_geq(st.capacity(), st.size() + 4);
		assert(tloc.valid()); assert(tloc.repOk(ebwt.eh()));
		assert_eq(bot-top, (index_t)(map_.size()-mapi_));
		pair<int, int> ret = make_pair(0, 0);
		assert_eq(top, tloc.toBWRow());
		if(bloc.valid()) {
			// Still multiple elements being tracked
			assert_lt(top+1, bot);
			index_t upto[4], in[4];
			upto[0] = in[0] = upto[1] = in[1] =
			upto[2] = in[2] = upto[3] = in[3] = 0;
			assert_eq(bot, bloc.toBWRow());
			met.bwops++;
			prm.nExFmops++;
			// Assert that there's not a dollar sign in the middle of
			// this range
			assert(bot <= ebwt._zOff || top > ebwt._zOff);
			ebwt.mapLFRange(tloc, bloc, bot-top, upto, in, gws.masks);
#ifndef NDEBUG
			for(int i = 0; i < 4; i++) {
			  assert_eq(bot-top, (index_t)(gws.masks[i].size()));
			}
#endif
			bool first = true;
			ASSERT_ONLY(index_t sum = 0);
			index_t newtop = 0, newbot = 0;
			gws.map.clear();
			for(int i = 0; i < 4; i++) {
				if(in[i] > 0) {
					// Non-empty range resulted
					if(first) {
						// For the first one, 
						first = false;
						newtop = upto[i];
						newbot = newtop + in[i];
						assert_leq(newbot-newtop, bot-top);
						// Range narrowed so we have to look at the masks
						for(size_t j = 0; j < gws.masks[i].size(); j++) {
							assert_lt(j+mapi_, map_.size());
							if(gws.masks[i][j]) {
								gws.map.push_back(map_[j+mapi_]);
								assert(gws.map.size() <= 1 || gws.map.back() != gws.map[gws.map.size()-2]);
#ifndef NDEBUG
								// If this element is not yet resolved,
								// then check that it really is the
								// expected number of steps to the left
								// of the corresponding element in the
								// root range
								assert_lt(gws.map.back(), sa.size());
								if(sa.offs[gws.map.back()] == (index_t)OFF_MASK) {
									assert_eq(newtop + gws.map.size() - 1,
											  ebwt.walkLeft(sa.topf + gws.map.back(), step+1));
								}
#endif
							}
						}
 						assert_eq(newbot-newtop, (index_t)(gws.map.size()));
					} else {
						// For each beyond the first, create a new
						// GWState and add it to the GWState list. 
						// NOTE: this can cause the underlying list to
						// be expanded which in turn might leave 'st'
						// pointing to bad memory.
						st.expand();
						st.back().reset();
						index_t ntop = upto[i];
						index_t nbot = ntop + in[i];
						assert_lt(nbot-ntop, bot-top);
						st.back().mapi_ = 0;
						st.back().map_.clear();
						met.branches++;
						// Range narrowed so we have to look at the masks
						for(size_t j = 0; j < gws.masks[i].size(); j++) {
							if(gws.masks[i][j]) st.back().map_.push_back(map_[j+mapi_]);
						}
						pair<int, int> rret =
						st.back().init(
							ebwt,        // forward Bowtie index
							ref,         // bitpair-encodede reference
							sa,          // SA range with offsets
							st,          // EList of all GWStates associated with original range
							hit,         // associated GWHit object
							(index_t)st.size()-1, // range offset
							reportList,  // if true, report hits to 'res' list
							res,         // report hits here if reportList is true
							ntop,        // BW top of new range
							nbot,        // BW bot of new range
							step+1,      // # steps taken to get to this new range
							met);        // update these metrics
						ret.first += rret.first;
						ret.second += rret.second;
					}
					ASSERT_ONLY(sum += in[i]);
				}
			}
			mapi_ = 0;
			assert_eq(bot-top, sum);
			assert_gt(newbot, newtop);
			assert_leq(newbot-newtop, bot-top);
			assert(top != newtop || bot != newbot);
			//assert(!(newtop < top && newbot > top));
			top = newtop;
			bot = newbot;
			if(!gws.map.empty()) {
				map_ = gws.map;
			}
			//assert(repOkMapRepeats());
			//assert(repOkMapInclusive(hit, range));
			assert_eq(bot-top, (index_t)map_.size());
		} else {
			// Down to one element
			assert_eq(bot, top+1);
			assert_eq(1, map_.size()-mapi_);
			// Sets top, returns char walked through (which we ignore)
			ASSERT_ONLY(index_t oldtop = top);
			met.bwops++;
			prm.nExFmops++;
			ebwt.mapLF1(top, tloc);
			assert_neq(top, oldtop);
			bot = top+1;
			if(mapi_ > 0) {
				map_[0] = map_[mapi_];
				mapi_ = 0;
			}
			map_.resize(1);
		}
		assert(top != origTop || bot != origBot);
		step++;
		assert_gt(step, 0);
		assert_leq((index_t)step, ebwt.eh().len());
		pair<int, int> rret =
		init<S>(
			ebwt,       // forward Bowtie index
			ref,        // bitpair-encodede reference
			sa,         // SA range with offsets
			st,         // EList of all GWStates associated with original range
			hit,        // associated GWHit object
			range,      // range offset
			reportList, // if true, report hits to 'res' list
			res,        // report hits here if reportList is true
			met);       // update these metrics
		ret.first += rret.first;
		ret.second += rret.second;
		return ret;
	}

	/**
	 * Clear all state in preparation for the next walk.
	 */
	void reset() {
		top = bot = step = mapi_ = 0;
		ASSERT_ONLY(lastStep_ = -1);
		ASSERT_ONLY(inited_ = false);
		tloc.invalidate();
		bloc.invalidate();
		map_.clear();
	}
	
	/**
	 * Resize the map_ field to the given size.
	 */
	void initMap(size_t newsz) {
		mapi_ = 0;
		map_.resize(newsz);
		for(size_t i = 0; i < newsz; i++) {
			map_[i] = (index_t)i;
		}
	}

	/**
	 * Return true iff all rows corresponding to this GWState have been
	 * resolved and reported.
	 */
	bool doneReporting(const GWHit<index_t, T>& hit) const {
		for(size_t i = mapi_; i < map_.size(); i++) {
			if(!hit.reported(map(i))) return false;
		}
		return true;
	}

	/**
	 * Return true iff all rows corresponding to this GWState have been
	 * resolved (but not necessarily reported).
	 */
	bool doneResolving(const SARangeWithOffs<T>& sa) const {
		for(size_t i = mapi_; i < map_.size(); i++) {
			if(sa.offs[map(i)] == (index_t)OFF_MASK) return false;
		}
		return true;
	}

	SideLocus<index_t> tloc;      // SideLocus for top
	SideLocus<index_t> bloc;      // SideLocus for bottom
	index_t            top;       // top elt of range in BWT
	index_t            bot;       // bot elt of range in BWT
	int                step;      // how many steps have we walked to the left so far

protected:
	
	ASSERT_ONLY(bool inited_);
	ASSERT_ONLY(int lastStep_);
	EList<index_t, 16> map_; // which elts in range 'range' we're tracking
	index_t mapi_;           // first untrimmed element of map
};

template<typename index_t, typename T, int S>
class GroupWalk2S {
public:
	typedef EList<GWState<index_t, T>, S> TStateV;

	GroupWalk2S() : st_(8, GW_CAT) {
		reset();
	}
	
	/**
	 * Reset the GroupWalk in preparation for the next SeedResults.
	 */
	void reset() {
		elt_ = rep_ = 0;
		ASSERT_ONLY(inited_ = false);
	}

	/**
	 * Initialize a new group walk w/r/t a QVal object.
	 */
	void init(
		const Ebwt<index_t>& ebwtFw, // forward Bowtie index for walking left
		const BitPairReference& ref, // bitpair-encoded reference
		SARangeWithOffs<T>& sa,      // SA range with offsets
		RandomSource& rnd,           // pseudo-random generator for sampling rows
		WalkMetrics& met)            // update metrics here
	{
		reset();
#ifndef NDEBUG
		inited_ = true;
#endif
		// Init GWHit
		hit_.init(sa, 0, false, 0);
		// Init corresponding GWState
		st_.resize(1);
		st_.back().reset();
		assert(st_.back().repOkBasic());
		index_t top = sa.topf;
		index_t bot = (index_t)(top + sa.size());
		st_.back().initMap(bot-top);
		st_.ensure(4);
		st_.back().init(
			ebwtFw,             // Bowtie index
			ref,                // bitpair-encoded reference
			sa,                 // SA range with offsets
			st_,                // EList<GWState>
			hit_,               // GWHit
			0,                  // range 0
			false,              // put resolved elements into res_?
			NULL,               // put resolved elements here
			top,                // BW row at top
			bot,                // BW row at bot
			0,                  // # steps taken
			met);               // update metrics here
		elt_ += sa.size();
		assert(hit_.repOk(sa));
	}

	//
	// ELEMENT-BASED
	//

	/**
	 * Advance the GroupWalk until all elements have been resolved.
	 * FIXME FB: Commented as the types of advanceElements do not correlate with the types of the function definition.
	 */
//	void resolveAll(WalkMetrics& met, PerReadMetrics& prm) {
//		WalkResult<index_t> res; // ignore results for now
//		for(size_t i = 0; i < elt_; i++) {
//			advanceElement((index_t)i, res, met, prm);
//		}
//	}

	/**
	 * Advance the GroupWalk until the specified element has been
	 * resolved.
	 */
	bool advanceElement(
		index_t elt,                  // element within the range
		const Ebwt<index_t>& ebwtFw,  // forward Bowtie index for walking left
		const BitPairReference& ref,  // bitpair-encoded reference
		SARangeWithOffs<T>& sa,       // SA range with offsets
		GroupWalkState<index_t>& gws, // GroupWalk state; scratch space
		WalkResult<index_t>& res,     // put the result here
		WalkMetrics& met,             // metrics
		PerReadMetrics& prm)          // per-read metrics
	{
		assert(inited_);
		assert(!done());
		assert(hit_.repOk(sa));
		assert_lt(elt, sa.size()); // elt must fall within range
		// Until we've resolved our element of interest...
		while(sa.offs[elt] == (index_t)OFF_MASK) {
			// Get the GWState that contains our element of interest
			size_t range = hit_.fmap[elt].first;
			st_.ensure(4);
			GWState<index_t, T>& st = st_[range];
			assert(!st.doneResolving(sa));
			// Returns a pair of numbers, the first being the number of
			// resolved but unreported offsets found during this advance, the
			// second being the number of as-yet-unresolved offsets.
			st.advance(
				ebwtFw,
				ref,
				sa,
				hit_,
				(index_t)range,
				false,
				NULL,
				st_,
				gws,
				met,
				prm);
			assert(sa.offs[elt] != (index_t)OFF_MASK ||
			       !st_[hit_.fmap[elt].first].doneResolving(sa));
		}
		assert_neq((index_t)OFF_MASK, sa.offs[elt]);
		// Report it!
		if(!hit_.reported(elt)) {
			hit_.setReported(elt);
		}
		met.reports++;
		res.init(
			0,              // seed offset
			false,          // orientation
			0,              // range
			elt,            // element
			sa.topf + elt,  // bw row
			(index_t)sa.len, // length of hit
			sa.offs[elt]);  // resolved text offset
		rep_++;
		return true;
	}

	/**
	 * Return true iff all elements have been resolved and reported.
	 */
	bool done() const { return rep_ == elt_; }
	
#ifndef NDEBUG
	/**
	 * Check that GroupWalk is internally consistent.
	 */
	bool repOk(const SARangeWithOffs<T>& sa) const {
		assert(hit_.repOk(sa));
		assert_leq(rep_, elt_);
		// This is a lot of work
		size_t resolved = 0, reported = 0;
		// For each element
		const size_t sz = sa.size();
		for(size_t m = 0; m < sz; m++) {
			// Is it resolved?
			if(sa.offs[m] != (index_t)OFF_MASK) {
				resolved++;
			} else {
				assert(!hit_.reported(m));
			}
			// Is it reported?
			if(hit_.reported(m)) {
				reported++;
			}
			assert_geq(resolved, reported);
		}
		assert_geq(resolved, reported);
		assert_eq(rep_, reported);
		assert_eq(elt_, sz);
		return true;
	}
#endif

	/**
	 * Return the number of BW elements that we can resolve.
	 */
	index_t numElts() const { return elt_; }
	
	/**
	 * Return the size occupied by this GroupWalk and all its constituent
	 * objects.
	 */
	size_t totalSizeBytes() const {
		return 2 * sizeof(size_t) + st_.totalSizeBytes() + sizeof(GWHit<index_t, T>);
	}
	/**
	 * Return the capacity of this GroupWalk and all its constituent objects.
	 */
	size_t totalCapacityBytes() const {
		return 2 * sizeof(size_t) + st_.totalCapacityBytes() + sizeof(GWHit<index_t, T>);
	}
	
#ifndef NDEBUG
	bool initialized() const { return inited_; }
#endif
	
protected:

	ASSERT_ONLY(bool inited_);    // initialized?
	
	index_t elt_;    // # BW elements under the control of the GropuWalk
	index_t rep_;    // # BW elements reported

	// For each orientation and seed offset, keep a GWState object that
	// holds the state of the walk so far.
	TStateV st_;

	// For each orientation and seed offset, keep an EList of GWHit.
	GWHit<index_t, T> hit_;
};

#endif /*GROUP_WALK_H_*/