File: bt2_idx.h

package info (click to toggle)
bowtie2 2.3.4.3-1
links: PTS, VCS
area: main
in suites: buster
size: 26,404 kB
sloc: cpp: 59,921; perl: 7,423; sh: 1,061; python: 958; makefile: 488; ansic: 122
file content (3090 lines) | stat: -rw-r--r-- 96,717 bytes
/*
 * Copyright 2011, Ben Langmead <langmea@cs.jhu.edu>
 *
 * This file is part of Bowtie 2.
 *
 * Bowtie 2 is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Bowtie 2 is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Bowtie 2.  If not, see <http://www.gnu.org/licenses/>.
 */

#ifndef EBWT_H_
#define EBWT_H_

#include <stdint.h>
#include <string.h>
#include <iostream>
#include <fstream>
#include <sstream>
#include <memory>
#include <fcntl.h>
#include <math.h>
#include <errno.h>
#include <stdexcept>
#include <sys/stat.h>
#ifdef BOWTIE_MM
#include <sys/mman.h>
#include <sys/shm.h>
#endif
#include "shmem.h"
#include "alphabet.h"
#include "assert_helpers.h"
#include "bitpack.h"
#include "blockwise_sa.h"
#include "endian_swap.h"
#include "word_io.h"
#include "random_source.h"
#include "ref_read.h"
#include "threading.h"
#include "str_util.h"
#include "mm.h"
#include "timer.h"
#include "reference.h"
#include "search_globals.h"
#include "ds.h"
#include "random_source.h"
#include "mem_ids.h"
#include "btypes.h"

#ifdef POPCNT_CAPABILITY 
    #include "processor_support.h" 
#endif 

using namespace std;

// From ccnt_lut.cpp, automatically generated by gen_lookup_tables.pl
extern uint8_t cCntLUT_4[4][4][256];

static const uint64_t c_table[4] = {
	0xffffffffffffffff,
	0xaaaaaaaaaaaaaaaa,
	0x5555555555555555,
	0x0000000000000000
};

#ifndef VMSG_NL
#define VMSG_NL(...) \
if(this->verbose()) { \
	stringstream tmp; \
	tmp << __VA_ARGS__ << endl; \
	this->verbose(tmp.str()); \
}
#endif

#ifndef VMSG
#define VMSG(...) \
if(this->verbose()) { \
	stringstream tmp; \
	tmp << __VA_ARGS__; \
	this->verbose(tmp.str()); \
}
#endif

/**
 * Flags describing type of Ebwt.
 */
enum EBWT_FLAGS {
	EBWT_COLOR = 2,     // true -> Ebwt is colorspace
	EBWT_ENTIRE_REV = 4 // true -> reverse Ebwt is the whole
	                    // concatenated string reversed, rather than
						// each stretch reversed
};

/**
 * Extended Burrows-Wheeler transform header.  This together with the
 * actual data arrays and other text-specific parameters defined in
 * class Ebwt constitute the entire Ebwt.
 */
class EbwtParams {

public:
	EbwtParams() { }

	EbwtParams(
		TIndexOffU len,
		int32_t lineRate,
		int32_t offRate,
		int32_t ftabChars,
		bool color,
		bool entireReverse)
	{
		init(len, lineRate, offRate, ftabChars, color, entireReverse);
	}

	EbwtParams(const EbwtParams& eh) {
		init(eh._len, eh._lineRate, eh._offRate,
		     eh._ftabChars, eh._color, eh._entireReverse);
	}

	void init(
		TIndexOffU len,
		int32_t lineRate,
		int32_t offRate,
		int32_t ftabChars,
		bool color,
		bool entireReverse)
	{
		_color = color;
		_entireReverse = entireReverse;
		_len = len;
		_bwtLen = _len + 1;
		_sz = (len+3)/4;
		_bwtSz = (len/4 + 1);
		_lineRate = lineRate;
		_origOffRate = offRate;
		_offRate = offRate;
		_offMask = OFF_MASK << _offRate;
		_ftabChars = ftabChars;
		_eftabLen = _ftabChars*2;
		_eftabSz = _eftabLen*OFF_SIZE;
		_ftabLen = (1 << (_ftabChars*2))+1;
		_ftabSz = _ftabLen*OFF_SIZE;
		_offsLen = (_bwtLen + (1 << _offRate) - 1) >> _offRate;
		_offsSz = (uint64_t)_offsLen*OFF_SIZE;
		_lineSz = 1 << _lineRate;
		_sideSz = _lineSz * 1 /* lines per side */;
		_sideBwtSz = _sideSz - OFF_SIZE*4;
		_sideBwtLen = _sideBwtSz*4;
		_numSides = (_bwtSz+(_sideBwtSz)-1)/(_sideBwtSz);
		_numLines = _numSides * 1 /* lines per side */;
		_ebwtTotLen = _numSides * _sideSz;
		_ebwtTotSz = _ebwtTotLen;
		assert(repOk());
	}

	TIndexOffU len() const           { return _len; }
	TIndexOffU lenNucs() const       { return _len + (_color ? 1 : 0); }
	TIndexOffU bwtLen() const        { return _bwtLen; }
	TIndexOffU sz() const            { return _sz; }
	TIndexOffU bwtSz() const         { return _bwtSz; }
	int32_t   lineRate() const      { return _lineRate; }
	int32_t   origOffRate() const   { return _origOffRate; }
	int32_t   offRate() const       { return _offRate; }
	TIndexOffU offMask() const       { return _offMask; }
	int32_t   ftabChars() const     { return _ftabChars; }
	int32_t eftabLen() const      { return _eftabLen; } 
	int32_t eftabSz() const       { return _eftabSz; } 
	TIndexOffU ftabLen() const       { return _ftabLen; }
	TIndexOffU ftabSz() const        { return _ftabSz; }
	TIndexOffU offsLen() const       { return _offsLen; }
	uint64_t offsSz() const        { return _offsSz; }
	int32_t lineSz() const        { return _lineSz; } 
	int32_t sideSz() const        { return _sideSz; } 
	int32_t sideBwtSz() const     { return _sideBwtSz; } 
	int32_t sideBwtLen() const    { return _sideBwtLen; } 
	TIndexOffU numSides() const      { return _numSides; }
	TIndexOffU numLines() const      { return _numLines; }
	TIndexOffU ebwtTotLen() const    { return _ebwtTotLen; }
	TIndexOffU ebwtTotSz() const     { return _ebwtTotSz; }
	bool color() const             { return _color; }
	bool entireReverse() const     { return _entireReverse; }

	/**
	 * Set a new suffix-array sampling rate, which involves updating
	 * rate, mask, sample length, and sample size.
	 */
	void setOffRate(int __offRate) {
		_offRate = __offRate;
		_offMask = OFF_MASK << _offRate;
		_offsLen = (_bwtLen + (1 << _offRate) - 1) >> _offRate;
		_offsSz = (uint64_t)_offsLen * OFF_SIZE;
	}

#ifndef NDEBUG
	/// Check that this EbwtParams is internally consistent
	bool repOk() const {
		assert_gt(_len, 0);
		assert_gt(_lineRate, 3);
		assert_geq(_offRate, 0);
		assert_leq(_ftabChars, 16);
		assert_geq(_ftabChars, 1);
		assert_lt(_lineRate, 32);
		assert_lt(_ftabChars, 32);
		assert_eq(0, _ebwtTotSz % _lineSz);
		return true;
	}
#endif

	/**
	 * Pretty-print the header contents to the given output stream.
	 */
	void print(ostream& out) const {
		out << "Headers:" << endl
		    << "    len: "          << _len << endl
		    << "    bwtLen: "       << _bwtLen << endl
		    << "    sz: "           << _sz << endl
		    << "    bwtSz: "        << _bwtSz << endl
		    << "    lineRate: "     << _lineRate << endl
		    << "    offRate: "      << _offRate << endl
		    << "    offMask: 0x"    << hex << _offMask << dec << endl
		    << "    ftabChars: "    << _ftabChars << endl
		    << "    eftabLen: "     << _eftabLen << endl
		    << "    eftabSz: "      << _eftabSz << endl
		    << "    ftabLen: "      << _ftabLen << endl
		    << "    ftabSz: "       << _ftabSz << endl
		    << "    offsLen: "      << _offsLen << endl
		    << "    offsSz: "       << _offsSz << endl
		    << "    lineSz: "       << _lineSz << endl
		    << "    sideSz: "       << _sideSz << endl
		    << "    sideBwtSz: "    << _sideBwtSz << endl
		    << "    sideBwtLen: "   << _sideBwtLen << endl
		    << "    numSides: "     << _numSides << endl
		    << "    numLines: "     << _numLines << endl
		    << "    ebwtTotLen: "   << _ebwtTotLen << endl
		    << "    ebwtTotSz: "    << _ebwtTotSz << endl
		    << "    color: "        << _color << endl
		    << "    reverse: "      << _entireReverse << endl;
	}

	TIndexOffU _len;
	TIndexOffU _bwtLen;
	TIndexOffU _sz;
	TIndexOffU _bwtSz;
	int32_t  _lineRate;
	int32_t  _origOffRate;
	int32_t  _offRate;
	TIndexOffU _offMask;
	int32_t  _ftabChars;
	uint32_t _eftabLen;
	uint32_t _eftabSz;
	TIndexOffU _ftabLen; 
	TIndexOffU _ftabSz; 
	TIndexOffU _offsLen;
	uint64_t _offsSz;
	uint32_t _lineSz; 
	uint32_t _sideSz; 
	uint32_t _sideBwtSz; 
	uint32_t _sideBwtLen; 
	TIndexOffU _numSides;
	TIndexOffU _numLines;
	TIndexOffU _ebwtTotLen;
	TIndexOffU _ebwtTotSz;
	bool     _color;
	bool     _entireReverse;
};

/**
 * Exception to throw when a file-realted error occurs.
 */
class EbwtFileOpenException : public std::runtime_error {
public:
	EbwtFileOpenException(const std::string& msg = "") :
		std::runtime_error(msg) { }
};

/**
 * Calculate size of file with given name.
 */
static inline int64_t fileSize(const char* name) {
	std::ifstream f;
	f.open(name, std::ios_base::binary | std::ios_base::in);
	if (!f.good() || f.eof() || !f.is_open()) { return 0; }
	f.seekg(0, std::ios_base::beg);
	std::ifstream::pos_type begin_pos = f.tellg();
	f.seekg(0, std::ios_base::end);
	return static_cast<int64_t>(f.tellg() - begin_pos);
}

/**
 * Encapsulates a location in the bwt text in terms of the side it
 * occurs in and its offset within the side.
 */
struct SideLocus {
	SideLocus() :
	_sideByteOff(0),
	_sideNum(0),
	_charOff(0),
	_by(-1),
	_bp(-1) { }

	/**
	 * Construct from row and other relevant information about the Ebwt.
	 */
	SideLocus(TIndexOffU row, const EbwtParams& ep, const uint8_t* ebwt) {
		initFromRow(row, ep, ebwt);
	}

	/**
	 * Init two SideLocus objects from a top/bot pair, using the result
	 * from one call to initFromRow to possibly avoid a second call.
	 */
	static void initFromTopBot(
		TIndexOffU top,
		TIndexOffU bot,
		const EbwtParams& ep,
		const uint8_t* ebwt,
		SideLocus& ltop,
		SideLocus& lbot)
	{
		const TIndexOffU sideBwtLen = ep._sideBwtLen;
		assert_gt(bot, top);
		ltop.initFromRow(top, ep, ebwt);
		TIndexOffU spread = bot - top;
		// Many cache misses on the following lines
		if(ltop._charOff + spread < sideBwtLen) {
			lbot._charOff = (uint32_t)(ltop._charOff + spread);
			lbot._sideNum = ltop._sideNum;
			lbot._sideByteOff = ltop._sideByteOff;
			lbot._by = lbot._charOff >> 2;
			assert_lt(lbot._by, (int)ep._sideBwtSz);
			lbot._bp = lbot._charOff & 3;
		} else {
			lbot.initFromRow(bot, ep, ebwt);
		}
	}

	/**
	 * Calculate SideLocus based on a row and other relevant
	 * information about the shape of the Ebwt.
	 */
	void initFromRow(TIndexOffU row, const EbwtParams& ep, const uint8_t* ebwt) {
		const int32_t sideSz     = ep._sideSz;
		// Side length is hard-coded for now; this allows the compiler
		// to do clever things to accelerate / and %.
		_sideNum                  = row / (48*OFF_SIZE);
		assert_lt(_sideNum, ep._numSides);
		_charOff                  = row % (48*OFF_SIZE);
		_sideByteOff              = _sideNum * sideSz;
		assert_leq(row, ep._len);
		assert_leq(_sideByteOff + sideSz, ep._ebwtTotSz);
		// Tons of cache misses on the next line
		_by = _charOff >> 2; // byte within side
		assert_lt(_by, (int)ep._sideBwtSz);
		_bp = _charOff & 3;  // bit-pair within byte
	}
	
	/**
	 * Transform this SideLocus to refer to the next side (i.e. the one
	 * corresponding to the next side downstream).  Set all cursors to
	 * point to the beginning of the side.
	 */
	void nextSide(const EbwtParams& ep) {
		assert(valid());
		_sideByteOff += ep.sideSz();
		_sideNum++;
		_by = _bp = _charOff = 0;
		assert(valid());
	}

	/**
	 * Return true iff this is an initialized SideLocus
	 */
	bool valid() const {
		if(_bp != -1) {
			return true;
		}
		return false;
	}
	
	/**
	 * Convert locus to BW row it corresponds to.
	 */
	TIndexOffU toBWRow() const {
		return _sideNum * 48*OFF_SIZE + _charOff;
	}
	
#ifndef NDEBUG
	/**
	 * Check that SideLocus is internally consistent and consistent
	 * with the (provided) EbwtParams.
	 */
	bool repOk(const EbwtParams& ep) const {
		ASSERT_ONLY(TIndexOffU row = _sideNum * 48*OFF_SIZE + _charOff);
		assert_leq(row, ep._len);
		assert_range(-1, 3, _bp);
		assert_range(0, (int)ep._sideBwtSz, _by);
		return true;
	}
#endif

	/// Make this look like an invalid SideLocus
	void invalidate() {
		_bp = -1;
	}

	/**
	 * Return a read-only pointer to the beginning of the top side.
	 */
	const uint8_t *side(const uint8_t* ebwt) const {
		return ebwt + _sideByteOff;
	}

	TIndexOffU _sideByteOff; // offset of top side within ebwt[]
	TIndexOffU _sideNum;     // index of side
	uint32_t _charOff;      // character offset within side
	int32_t _by;            // byte within side (not adjusted for bw sides)
	int32_t _bp;            // bitpair within byte (not adjusted for bw sides)
};
#ifdef POPCNT_CAPABILITY   // wrapping of "struct"
struct USE_POPCNT_GENERIC {
#endif
// Use this standard bit-bashing population count
inline static int pop64(uint64_t x) {
	// Lots of cache misses on following lines (>10K)
	x = x - ((x >> 1) & 0x5555555555555555llu);
	x = (x & 0x3333333333333333llu) + ((x >> 2) & 0x3333333333333333llu);
	x = (x + (x >> 4)) & 0x0F0F0F0F0F0F0F0Fllu;
	x = x + (x >> 8);
	x = x + (x >> 16);
	x = x + (x >> 32);
	return (int)(x & 0x3Fllu);
}
#ifdef POPCNT_CAPABILITY  // wrapping a "struct"
};
#endif

#ifdef POPCNT_CAPABILITY
    struct USE_POPCNT_INSTRUCTION {
        inline static int pop64(uint64_t x) {
            int64_t count;
            asm ("popcnt %[x],%[count]\n": [count] "=&r" (count): [x] "r" (x));
            return count;
        }
    };
#endif

/**
 * Tricky-bit-bashing bitpair counting for given two-bit value (0-3)
 * within a 64-bit argument.
 */
#ifdef POPCNT_CAPABILITY
template<typename Operation>
#endif
inline static int countInU64(int c, uint64_t dw) {
	uint64_t c0 = c_table[c];
	uint64_t x0 = dw ^ c0;
	uint64_t x1 = (x0 >> 1);
	uint64_t x2 = x1 & (0x5555555555555555);
	uint64_t x3 = x0 & x2;
#ifdef POPCNT_CAPABILITY
    uint64_t tmp = Operation().pop64(x3);
#else
    uint64_t tmp = pop64(x3);
#endif
	return (int) tmp;
}

// Forward declarations for Ebwt class
class EbwtSearchParams;

/**
 * Extended Burrows-Wheeler transform data.
 *
 * An Ebwt may be transferred to and from RAM with calls to
 * evictFromMemory() and loadIntoMemory().  By default, a newly-created
 * Ebwt is not loaded into memory; if the user would like to use a
 * newly-created Ebwt to answer queries, they must first call
 * loadIntoMemory().
 */
class Ebwt {
public:
	#define Ebwt_INITS \
	    _toBigEndian(currentlyBigEndian()), \
	    _overrideOffRate(overrideOffRate), \
	    _verbose(verbose), \
	    _passMemExc(passMemExc), \
	    _sanity(sanityCheck), \
	    fw_(fw), \
	    _in1(NULL), \
	    _in2(NULL), \
	    _zOff(OFF_MASK), \
	    _zEbwtByteOff(OFF_MASK), \
	    _zEbwtBpOff(-1), \
	    _nPat(0), \
	    _nFrag(0), \
	    _plen(EBWT_CAT), \
	    _rstarts(EBWT_CAT), \
	    _fchr(EBWT_CAT), \
	    _ftab(EBWT_CAT), \
	    _eftab(EBWT_CAT), \
	    _offs(EBWT_CAT), \
	    _ebwt(EBWT_CAT), \
	    _useMm(false), \
	    useShmem_(false), \
	    _refnames(EBWT_CAT), \
	    mmFile1_(NULL), \
	    mmFile2_(NULL)

	/// Construct an Ebwt from the given input file
	Ebwt(const string& in,
	     int color,
		 int needEntireReverse,
	     bool fw,
	     int32_t overrideOffRate, // = -1,
	     int32_t offRatePlus, // = -1,
	     bool useMm, // = false,
	     bool useShmem, // = false,
	     bool mmSweep, // = false,
	     bool loadNames, // = false,
		 bool loadSASamp, // = true,
		 bool loadFtab, // = true,
		 bool loadRstarts, // = true,
	     bool verbose, // = false,
	     bool startVerbose, // = false,
	     bool passMemExc, // = false,
	     bool sanityCheck) : // = false) :
	     Ebwt_INITS
	{
		assert(!useMm || !useShmem);
#ifdef POPCNT_CAPABILITY 
        ProcessorSupport ps; 
        _usePOPCNTinstruction = ps.POPCNTenabled(); 
#endif 
		packed_ = false;
		_useMm = useMm;
		useShmem_ = useShmem;
		_in1Str = in + ".1." + gEbwt_ext;
		_in2Str = in + ".2." + gEbwt_ext;
		readIntoMemory(
			color,       // expect index to be colorspace?
			fw ? -1 : needEntireReverse, // need REF_READ_REVERSE
			loadSASamp,  // load the SA sample portion?
			loadFtab,    // load the ftab & eftab?
			loadRstarts, // load the rstarts array?
			true,        // stop after loading the header portion?
			&_eh,        // params
			mmSweep,     // mmSweep
			loadNames,   // loadNames
			startVerbose); // startVerbose
		// If the offRate has been overridden, reflect that in the
		// _eh._offRate field
		if(offRatePlus > 0 && _overrideOffRate == -1) {
			_overrideOffRate = _eh._offRate + offRatePlus;
		}
		if(_overrideOffRate > _eh._offRate) {
			_eh.setOffRate(_overrideOffRate);
			assert_eq(_overrideOffRate, _eh._offRate);
		}
		assert(repOk());
	}

	/// Construct an Ebwt from the given header parameters and string
	/// vector, optionally using a blockwise suffix sorter with the
	/// given 'bmax' and 'dcv' parameters.  The string vector is
	/// ultimately joined and the joined string is passed to buildToDisk().
	template<typename TStr>
	Ebwt(
		TStr exampleStr,
		bool packed,
		int color,
		int needEntireReverse,
		int32_t lineRate,
		int32_t offRate,
		int32_t ftabChars,
        int nthreads,
		const string& file,   // base filename for EBWT files
		bool fw,
		bool useBlockwise,
		TIndexOffU bmax,
		TIndexOffU bmaxSqrtMult,
		TIndexOffU bmaxDivN,
		int dcv,
		EList<FileBuf*>& is,
		EList<RefRecord>& szs,
		TIndexOffU sztot,
		const RefReadInParams& refparams,
		uint32_t seed,
		int32_t overrideOffRate = -1,
		bool doSaFile = false,
		bool doBwtFile = false,
		bool verbose = false,
		bool passMemExc = false,
		bool sanityCheck = false) :
		Ebwt_INITS,
		_eh(
			joinedLen(szs),
			lineRate,
			offRate,
			ftabChars,
			color,
			refparams.reverse == REF_READ_REVERSE)
	{
#ifdef POPCNT_CAPABILITY 
        ProcessorSupport ps; 
        _usePOPCNTinstruction = ps.POPCNTenabled(); 
#endif 
		_in1Str = file + ".1." + gEbwt_ext;
		_in2Str = file + ".2." + gEbwt_ext;
		packed_ = packed;
		// Open output files
		ofstream fout1(_in1Str.c_str(), ios::binary);
		if(!fout1.good()) {
			cerr << "Could not open index file for writing: \"" << _in1Str.c_str() << "\"" << endl
			     << "Please make sure the directory exists and that permissions allow writing by" << endl
			     << "Bowtie." << endl;
			throw 1;
		}
		ofstream fout2(_in2Str.c_str(), ios::binary);
		if(!fout2.good()) {
			cerr << "Could not open index file for writing: \"" << _in2Str.c_str() << "\"" << endl
			     << "Please make sure the directory exists and that permissions allow writing by" << endl
			     << "Bowtie." << endl;
			throw 1;
		}
		_inSaStr = file + ".sa";
		_inBwtStr = file + ".bwt";
		ofstream *saOut = NULL, *bwtOut = NULL;
		if(doSaFile) {
			saOut = new ofstream(_inSaStr.c_str(), ios::binary);
			if(!saOut->good()) {
				cerr << "Could not open suffix-array file for writing: \"" << _inSaStr.c_str() << "\"" << endl
			         << "Please make sure the directory exists and that permissions allow writing by" << endl
			         << "Bowtie." << endl;
				throw 1;
			}
		}
		if(doBwtFile) {
			bwtOut = new ofstream(_inBwtStr.c_str(), ios::binary);
			if(!bwtOut->good()) {
				cerr << "Could not open suffix-array file for writing: \"" << _inBwtStr.c_str() << "\"" << endl
			         << "Please make sure the directory exists and that permissions allow writing by" << endl
			         << "Bowtie." << endl;
				throw 1;
			}
		}
		// Build SA(T) and BWT(T) block by block
		initFromVector<TStr>(
			is,
		    szs,
		    sztot,
		    refparams,
		    fout1,
		    fout2,
                             file,
			saOut,
			bwtOut,
            nthreads,
		    useBlockwise,
		    bmax,
		    bmaxSqrtMult,
		    bmaxDivN,
		    dcv,
		    seed,
		    verbose);
		// Close output files
		fout1.flush();
		
		int64_t tellpSz1 = (int64_t)fout1.tellp();
		VMSG_NL("Wrote " << fout1.tellp() << " bytes to primary EBWT file: " << _in1Str.c_str());
		fout1.close();
		bool err = false;
		if(tellpSz1 > fileSize(_in1Str.c_str())) {
			err = true;
			cerr << "Index is corrupt: File size for " << _in1Str.c_str() << " should have been " << tellpSz1
			     << " but is actually " << fileSize(_in1Str.c_str()) << "." << endl;
		}
		fout2.flush();
		
		int64_t tellpSz2 = (int64_t)fout2.tellp();
		VMSG_NL("Wrote " << fout2.tellp() << " bytes to secondary EBWT file: " << _in2Str.c_str());
		fout2.close();
		if(tellpSz2 > fileSize(_in2Str.c_str())) {
			err = true;
			cerr << "Index is corrupt: File size for " << _in2Str.c_str() << " should have been " << tellpSz2
			     << " but is actually " << fileSize(_in2Str.c_str()) << "." << endl;
		}
		
		if(saOut != NULL) {
			// Check on suffix array output file size
			int64_t tellpSzSa = (int64_t)saOut->tellp();
			VMSG_NL("Wrote " << tellpSzSa << " bytes to suffix-array file: " << _inSaStr.c_str());
			saOut->close();
			if(tellpSzSa > fileSize(_inSaStr.c_str())) {
				err = true;
				cerr << "Index is corrupt: File size for " << _inSaStr.c_str() << " should have been " << tellpSzSa
					 << " but is actually " << fileSize(_inSaStr.c_str()) << "." << endl;
			}
		}

		if(bwtOut != NULL) {
			// Check on suffix array output file size
			int64_t tellpSzBwt = (int64_t)bwtOut->tellp();
			VMSG_NL("Wrote " << tellpSzBwt << " bytes to BWT file: " << _inBwtStr.c_str());
			bwtOut->close();
			if(tellpSzBwt > fileSize(_inBwtStr.c_str())) {
				err = true;
				cerr << "Index is corrupt: File size for " << _inBwtStr.c_str() << " should have been " << tellpSzBwt
					 << " but is actually " << fileSize(_inBwtStr.c_str()) << "." << endl;
			}
		}
		
		if(err) {
			cerr << "Please check if there is a problem with the disk or if disk is full." << endl;
			throw 1;
		}
		
		// Reopen as input streams
		VMSG_NL("Re-opening _in1 and _in2 as input streams");
		if(_sanity) {
			VMSG_NL("Sanity-checking Bt2");
			assert(!isInMemory());
			readIntoMemory(
				color,                       // colorspace?
				fw ? -1 : needEntireReverse, // 1 -> need the reverse to be reverse-of-concat
				true,                        // load SA sample (_offs[])?
				true,                        // load ftab (_ftab[] & _eftab[])?
				true,                        // load r-starts (_rstarts[])?
				false,                       // just load header?
				NULL,                        // Params object to fill
				false,                       // mm sweep?
				true,                        // load names?
				false);                      // verbose startup?
			sanityCheckAll(refparams.reverse);
			evictFromMemory();
			assert(!isInMemory());
		}
		VMSG_NL("Returning from Ebwt constructor");
	}
	
	/**
	 * Static constructor for a pair of forward/reverse indexes for the
	 * given reference string.
	 */
	template<typename TStr>
	static pair<Ebwt*, Ebwt*>
	fromString(
		const char* str,
		bool packed,
		int color,
		int reverse,
		bool bigEndian,
		int32_t lineRate,
		int32_t offRate,
		int32_t ftabChars,
		const string& file,
		bool useBlockwise,
		TIndexOffU bmax,
		TIndexOffU bmaxSqrtMult,
		TIndexOffU bmaxDivN,
		int dcv,
		uint32_t seed,
		bool verbose,
		bool autoMem,
		bool sanity)
	{
		EList<std::string> strs(EBWT_CAT);
		strs.push_back(std::string(str));
		return fromStrings<TStr>(
			strs,
			packed,
			color,
			reverse,
			bigEndian,
			lineRate,
			offRate,
			ftabChars,
			file,
			useBlockwise,
			bmax,
			bmaxSqrtMult,
			bmaxDivN,
			dcv,
			seed,
			verbose,
			autoMem,
			sanity);
	}
	
	/**
	 * Static constructor for a pair of forward/reverse indexes for the
	 * given list of reference strings.
	 */
	template<typename TStr>
	static pair<Ebwt*, Ebwt*>
	fromStrings(
		const EList<std::string>& strs,
		bool packed,
		int color,
		int reverse,
		bool bigEndian,
		int32_t lineRate,
		int32_t offRate,
		int32_t ftabChars,
		const string& file,
		bool useBlockwise,
		TIndexOffU bmax,
		TIndexOffU bmaxSqrtMult,
		TIndexOffU bmaxDivN,
		int dcv,
		uint32_t seed,
		bool verbose,
		bool autoMem,
		bool sanity)
	{
		assert(!strs.empty());
		EList<FileBuf*> is(EBWT_CAT);
		RefReadInParams refparams(color, REF_READ_FORWARD, false, false);
		// Adapt sequence strings to stringstreams open for input
		auto_ptr<stringstream> ss(new stringstream());
		for(TIndexOffU i = 0; i < strs.size(); i++) {
			(*ss) << ">" << i << endl << strs[i] << endl;
		}
		auto_ptr<FileBuf> fb(new FileBuf(ss.get()));
		assert(!fb->eof());
		assert(fb->get() == '>');
		ASSERT_ONLY(fb->reset());
		assert(!fb->eof());
		is.push_back(fb.get());
		// Vector for the ordered list of "records" comprising the input
		// sequences.  A record represents a stretch of unambiguous
		// characters in one of the input sequences.
		EList<RefRecord> szs(EBWT_CAT);
		std::pair<TIndexOffU, TIndexOffU> sztot;
		sztot = BitPairReference::szsFromFasta(is, file, bigEndian, refparams, szs, sanity);
		// Construct Ebwt from input strings and parameters
		Ebwt *ebwtFw = new Ebwt(
			TStr(),
			packed,
			refparams.color ? 1 : 0,
			-1,           // fw
			lineRate,
			offRate,      // suffix-array sampling rate
			ftabChars,    // number of chars in initial arrow-pair calc
			file,         // basename for .?.ebwt files
			true,         // fw?
			useBlockwise, // useBlockwise
			bmax,         // block size for blockwise SA builder
			bmaxSqrtMult, // block size as multiplier of sqrt(len)
			bmaxDivN,     // block size as divisor of len
			dcv,          // difference-cover period
			is,           // list of input streams
			szs,          // list of reference sizes
			sztot.first,  // total size of all unambiguous ref chars
			refparams,    // reference read-in parameters
			seed,         // pseudo-random number generator seed
			-1,           // override offRate
			verbose,      // be talkative
			autoMem,      // pass exceptions up to the toplevel so that we can adjust memory settings automatically
			sanity);      // verify results and internal consistency
		refparams.reverse = reverse;
		szs.clear();
		sztot = BitPairReference::szsFromFasta(is, file, bigEndian, refparams, szs, sanity);
		// Construct Ebwt from input strings and parameters
		Ebwt *ebwtBw = new Ebwt(
			TStr(),
			packed,
			refparams.color ? 1 : 0,
			reverse == REF_READ_REVERSE,
			lineRate,
			offRate,      // suffix-array sampling rate
			ftabChars,    // number of chars in initial arrow-pair calc
			file + ".rev",// basename for .?.ebwt files
			false,        // fw?
			useBlockwise, // useBlockwise
			bmax,         // block size for blockwise SA builder
			bmaxSqrtMult, // block size as multiplier of sqrt(len)
			bmaxDivN,     // block size as divisor of len
			dcv,          // difference-cover period
			is,           // list of input streams
			szs,          // list of reference sizes
			sztot.first,  // total size of all unambiguous ref chars
			refparams,    // reference read-in parameters
			seed,         // pseudo-random number generator seed
			-1,           // override offRate
			verbose,      // be talkative
			autoMem,      // pass exceptions up to the toplevel so that we can adjust memory settings automatically
			sanity);      // verify results and internal consistency
		return make_pair(ebwtFw, ebwtBw);
	}
	
	/// Return true iff the Ebwt is packed
	bool isPacked() { return packed_; }

	/**
	 * Write the rstarts array given the szs array for the reference.
	 */
	void szsToDisk(const EList<RefRecord>& szs, ostream& os, int reverse);
	
	/**
	 * Helper for the constructors above.  Takes a vector of text
	 * strings and joins them into a single string with a call to
	 * joinToDisk, which does a join (with padding) and writes some of
	 * the resulting data directly to disk rather than keep it in
	 * memory.  It then constructs a suffix-array producer (what kind
	 * depends on 'useBlockwise') for the resulting sequence.  The
	 * suffix-array producer can then be used to obtain chunks of the
	 * joined string's suffix array.
	 */
	template <typename TStr>
	void initFromVector(EList<FileBuf*>& is,
	                    EList<RefRecord>& szs,
	                    TIndexOffU sztot,
	                    const RefReadInParams& refparams,
	                    ofstream& out1,
	                    ofstream& out2,
                        const string& outfile,
	                    ofstream* saOut,
	                    ofstream* bwtOut,
                        int nthreads,
	                    bool useBlockwise,
	                    TIndexOffU bmax,
	                    TIndexOffU bmaxSqrtMult,
	                    TIndexOffU bmaxDivN,
	                    int dcv,
	                    uint32_t seed,
	                    bool verbose)
	{
		// Compose text strings into single string
		VMSG_NL("Calculating joined length");
		TStr s; // holds the entire joined reference after call to joinToDisk
		TIndexOffU jlen;
		jlen = joinedLen(szs);
		assert_geq(jlen, sztot);
		VMSG_NL("Writing header");
		writeFromMemory(true, out1, out2);
		try {
			VMSG_NL("Reserving space for joined string");
			s.resize(jlen);
			VMSG_NL("Joining reference sequences");
			if(refparams.reverse == REF_READ_REVERSE) {
				{
					Timer timer(cout, "  Time to join reference sequences: ", _verbose);
					joinToDisk(is, szs, sztot, refparams, s, out1, out2);
				} {
					Timer timer(cout, "  Time to reverse reference sequence: ", _verbose);
					EList<RefRecord> tmp(EBWT_CAT);
					s.reverse();
					reverseRefRecords(szs, tmp, false, verbose);
					szsToDisk(tmp, out1, refparams.reverse);
				}
			} else {
				Timer timer(cout, "  Time to join reference sequences: ", _verbose);
				joinToDisk(is, szs, sztot, refparams, s, out1, out2);
				szsToDisk(szs, out1, refparams.reverse);
			}
			// Joined reference sequence now in 's'
		} catch(bad_alloc& e) {
			// If we throw an allocation exception in the try block,
			// that means that the joined version of the reference
			// string itself is too larger to fit in memory.  The only
			// alternatives are to tell the user to give us more memory
			// or to try again with a packed representation of the
			// reference (if we haven't tried that already).
			cerr << "Could not allocate space for a joined string of " << jlen << " elements." << endl;
			if(!isPacked() && _passMemExc) {
				// Pass the exception up so that we can retry using a
				// packed string representation
				throw e;
			}
			// There's no point passing this exception on.  The fact
			// that we couldn't allocate the joined string means that
			// --bmax is irrelevant - the user should re-run with
			// ebwt-build-packed
			if(isPacked()) {
				cerr << "Please try running bowtie-build on a computer with more memory." << endl;
			} else {
				cerr << "Please try running bowtie-build in packed mode (-p/--packed) or in automatic" << endl
				     << "mode (-a/--auto), or try again on a computer with more memory." << endl;
			}
			if(sizeof(void*) == 4) {
				cerr << "If this computer has more than 4 GB of memory, try using a 64-bit executable;" << endl
				     << "this executable is 32-bit." << endl;
			}
			throw 1;
		}
		// Succesfully obtained joined reference string
		assert_geq(s.length(), jlen);
		if(bmax != OFF_MASK) {
			VMSG_NL("bmax according to bmax setting: " << bmax);
		}
		else if(bmaxSqrtMult != OFF_MASK) {
			bmax *= bmaxSqrtMult;
			VMSG_NL("bmax according to bmaxSqrtMult setting: " << bmax);
		}
		else if(bmaxDivN != OFF_MASK) {
			bmax = max<TIndexOffU>(jlen / bmaxDivN, 1);
			VMSG_NL("bmax according to bmaxDivN setting: " << bmax);
		}
		else {
			bmax = (TIndexOffU)sqrt(s.length());
			VMSG_NL("bmax defaulted to: " << bmax);
		}
		int iter = 0;
		bool first = true;
		streampos out1pos = out1.tellp();
		streampos out2pos = out2.tellp();
		// Look for bmax/dcv parameters that work.
		while(true) {
			if(!first && bmax < 40 && _passMemExc) {
				cerr << "Could not find approrpiate bmax/dcv settings for building this index." << endl;
				if(!isPacked()) {
					// Throw an exception exception so that we can
					// retry using a packed string representation
					throw bad_alloc();
				} else {
					cerr << "Already tried a packed string representation." << endl;
				}
				cerr << "Please try indexing this reference on a computer with more memory." << endl;
				if(sizeof(void*) == 4) {
					cerr << "If this computer has more than 4 GB of memory, try using a 64-bit executable;" << endl
						 << "this executable is 32-bit." << endl;
				}
				throw 1;
			}
			if(!first) {
				out1.seekp(out1pos);
				out2.seekp(out2pos);
			}
			if(dcv > 4096) dcv = 4096;
			if((iter % 6) == 5 && dcv < 4096 && dcv != 0) {
				dcv <<= 1; // double difference-cover period
			} else {
				bmax -= (bmax >> 2); // reduce by 25%
			}
			VMSG("Using parameters --bmax " << bmax);
			if(dcv == 0) {
				VMSG_NL(" and *no difference cover*");
			} else {
				VMSG_NL(" --dcv " << dcv);
			}
			iter++;
			try {
				{
					VMSG_NL("  Doing ahead-of-time memory usage test");
					// Make a quick-and-dirty attempt to force a bad_alloc iff
					// we would have thrown one eventually as part of
					// constructing the DifferenceCoverSample
					dcv <<= 1;
					TIndexOffU sz = (TIndexOffU)DifferenceCoverSample<TStr>::simulateAllocs(s, dcv >> 1);
                    if(nthreads > 1) sz *= (nthreads + 1);
					AutoArray<uint8_t> tmp(sz, EBWT_CAT);
					dcv >>= 1;
					// Likewise with the KarkkainenBlockwiseSA
					sz = (TIndexOffU)KarkkainenBlockwiseSA<TStr>::simulateAllocs(s, bmax);
					AutoArray<uint8_t> tmp2(sz, EBWT_CAT);
					// Now throw in the 'ftab' and 'isaSample' structures
					// that we'll eventually allocate in buildToDisk
					AutoArray<TIndexOffU> ftab(_eh._ftabLen * 2, EBWT_CAT);
					AutoArray<uint8_t> side(_eh._sideSz, EBWT_CAT);
					// Grab another 20 MB out of caution
					AutoArray<uint32_t> extra(20*1024*1024, EBWT_CAT);
					// If we made it here without throwing bad_alloc, then we
					// passed the memory-usage stress test
					VMSG("  Passed!  Constructing with these parameters: --bmax " << bmax << " --dcv " << dcv);
					if(isPacked()) {
						VMSG(" --packed");
					}
					VMSG_NL("");
				}
				VMSG_NL("Constructing suffix-array element generator");
				KarkkainenBlockwiseSA<TStr> bsa(s, bmax, nthreads, dcv, seed, _sanity, _passMemExc, _verbose, outfile);
				assert(bsa.suffixItrIsReset());
				assert_eq(bsa.size(), s.length()+1);
				VMSG_NL("Converting suffix-array elements to index image");
				buildToDisk(bsa, s, out1, out2, saOut, bwtOut);
				out1.flush(); out2.flush();
				bool failed = out1.fail() || out2.fail();
				if(saOut != NULL) {
					saOut->flush();
					failed = failed || saOut->fail();
				}
				if(bwtOut != NULL) {
					bwtOut->flush();
					failed = failed || bwtOut->fail();
				}
				if(failed) {
					cerr << "An error occurred writing the index to disk.  Please check if the disk is full." << endl;
					throw 1;
				}
				break;
			} catch(bad_alloc& e) {
				if(_passMemExc) {
					VMSG_NL("  Ran out of memory; automatically trying more memory-economical parameters.");
				} else {
					cerr << "Out of memory while constructing suffix array.  Please try using a smaller" << endl
						 << "number of blocks by specifying a smaller --bmax or a larger --bmaxdivn" << endl;
					throw 1;
				}
			}
			first = false;
		}
		assert(repOk());
		// Now write reference sequence names on the end
		assert_eq(this->_refnames.size(), this->_nPat);
		for(TIndexOffU i = 0; i < this->_refnames.size(); i++) {
			out1 << this->_refnames[i].c_str() << endl;
		}
		out1 << '\0';
		out1.flush(); out2.flush();
		if(out1.fail() || out2.fail()) {
			cerr << "An error occurred writing the index to disk.  Please check if the disk is full." << endl;
			throw 1;
		}
		VMSG_NL("Returning from initFromVector");
	}
	
	/**
	 * Return the length that the joined string of the given string
	 * list will have.  Note that this is indifferent to how the text
	 * fragments correspond to input sequences - it just cares about
	 * the lengths of the fragments.
	 */
	TIndexOffU joinedLen(EList<RefRecord>& szs) {
		TIndexOffU ret = 0;
		for(unsigned int i = 0; i < szs.size(); i++) {
			ret += szs[i].len;
		}
		return ret;
	}

	/// Destruct an Ebwt
	~Ebwt() {
		_fchr.reset();
		_ftab.reset();
		_eftab.reset();
		_plen.reset();
		_rstarts.reset();
		_offs.reset();
		_ebwt.reset();
		if(offs() != NULL && useShmem_) {
			FREE_SHARED(offs());
		}
		if(ebwt() != NULL && useShmem_) {
			FREE_SHARED(ebwt());
		}
		if (_in1 != NULL) fclose(_in1);
		if (_in2 != NULL) fclose(_in2);
	}

	/// Accessors
	inline const EbwtParams& eh() const     { return _eh; }
	TIndexOffU    zOff() const         { return _zOff; }
	TIndexOffU    zEbwtByteOff() const { return _zEbwtByteOff; }
	TIndexOff         zEbwtBpOff() const   { return _zEbwtBpOff; } 
	TIndexOffU    nPat() const         { return _nPat; }
	TIndexOffU    nFrag() const        { return _nFrag; }
	inline TIndexOffU*   fchr()              { return _fchr.get(); }
	inline TIndexOffU*   ftab()              { return _ftab.get(); }
	inline TIndexOffU*   eftab()             { return _eftab.get(); }
	inline TIndexOffU*   offs()              { return _offs.get(); }
	inline TIndexOffU*   plen()              { return _plen.get(); }
	inline TIndexOffU*   rstarts()           { return _rstarts.get(); }
	inline uint8_t*    ebwt()              { return _ebwt.get(); }
	inline const TIndexOffU* fchr() const    { return _fchr.get(); }
	inline const TIndexOffU* ftab() const    { return _ftab.get(); }
	inline const TIndexOffU* eftab() const   { return _eftab.get(); }
	inline const TIndexOffU* offs() const    { return _offs.get(); }
	inline const TIndexOffU* plen() const    { return _plen.get(); }
	inline const TIndexOffU* rstarts() const { return _rstarts.get(); }
	inline const uint8_t*  ebwt() const    { return _ebwt.get(); }
	bool        toBe() const         { return _toBigEndian; }
	bool        verbose() const      { return _verbose; }
	bool        sanityCheck() const  { return _sanity; }
	EList<string>& refnames()        { return _refnames; }
	bool        fw() const           { return fw_; }
#ifdef POPCNT_CAPABILITY 
    bool _usePOPCNTinstruction; 
#endif 

	/**
	 * Returns true iff the index contains the given string (exactly).  The
	 * given string must contain only unambiguous characters.  TODO:
	 * support skipping of ambiguous characters.
	 */
	bool contains(
		const BTDnaString& str,
		TIndexOffU *top = NULL,
		TIndexOffU *bot = NULL) const;

	/**
	 * Returns true iff the index contains the given string (exactly).  The
	 * given string must contain only unambiguous characters.  TODO:
	 * support skipping of ambiguous characters.
	 */
	bool contains(
		const char *str,
		TIndexOffU *top = NULL,
		TIndexOffU *bot = NULL) const
	{
		return contains(BTDnaString(str, true), top, bot);
	}
	
	/// Return true iff the Ebwt is currently in memory
	bool isInMemory() const {
		if(ebwt() != NULL) {
			// Note: We might have skipped loading _offs, _ftab,
			// _eftab, and _rstarts depending on whether this is the
			// reverse index and what algorithm is being used.
			assert(_eh.repOk());
			//assert(_ftab != NULL);
			//assert(_eftab != NULL);
			assert(fchr() != NULL);
			//assert(_offs != NULL);
			//assert(_rstarts != NULL);
			assert_neq(_zEbwtByteOff, OFF_MASK);
			assert_neq(_zEbwtBpOff, -1);
			return true;
		} else {
			assert(ftab() == NULL);
			assert(eftab() == NULL);
			assert(fchr() == NULL);
			assert(offs() == NULL);
			assert(rstarts() == NULL);
			assert_eq(_zEbwtByteOff, OFF_MASK);
			assert_eq(_zEbwtBpOff, -1);
			return false;
		}
	}

	/// Return true iff the Ebwt is currently stored on disk
	bool isEvicted() const {
		return !isInMemory();
	}

	/**
	 * Load this Ebwt into memory by reading it in from the _in1 and
	 * _in2 streams.
	 */
	void loadIntoMemory(
		int color,
		int needEntireReverse,
		bool loadSASamp,
		bool loadFtab,
		bool loadRstarts,
		bool loadNames,
		bool verbose)
	{
		readIntoMemory(
			color,       // expect index to be colorspace?
			needEntireReverse, // require reverse index to be concatenated reference reversed
			loadSASamp,  // load the SA sample portion?
			loadFtab,    // load the ftab (_ftab[] and _eftab[])?
			loadRstarts, // load the r-starts (_rstarts[])?
			false,       // stop after loading the header portion?
			NULL,        // params
			false,       // mmSweep
			loadNames,   // loadNames
			verbose);    // startVerbose
	}

	/**
	 * Frees memory associated with the Ebwt.
	 */
	void evictFromMemory() {
		assert(isInMemory());
		_fchr.free();
		_ftab.free();
		_eftab.free();
		_rstarts.free();
		_offs.free(); // might not be under control of APtrWrap
		_ebwt.free(); // might not be under control of APtrWrap
		// Keep plen; it's small and the client may want to seq it
		// even when the others are evicted.
		//_plen  = NULL;
		_zEbwtByteOff = OFF_MASK;
		_zEbwtBpOff = -1;
	}

	/**
	 * Turn a substring of 'seq' starting at offset 'off' and having
	 * length equal to the index's 'ftabChars' into an int that can be
	 * used to index into the ftab array.
	 */
	TIndexOffU ftabSeqToInt(
		const BTDnaString& seq,
		size_t off,
		bool rev) const
	{
		int fc = _eh._ftabChars;
		size_t lo = off, hi = lo + fc;
		assert_leq(hi, seq.length());
		TIndexOffU ftabOff = 0;
		for(int i = 0; i < fc; i++) {
			bool fwex = fw();
			if(rev) fwex = !fwex;
			// We add characters to the ftabOff in the order they would
			// have been consumed in a normal search.  For BWT, this
			// means right-to-left order; for BWT' it's left-to-right.
			int c = (fwex ? seq[lo + i] : seq[hi - i - 1]);
			if(c > 3) {
				return std::numeric_limits<TIndexOffU>::max();
			}
			assert_range(0, 3, c);
			ftabOff <<= 2;
			ftabOff |= c;
		}
		return ftabOff;
	}
	
	/**
	 * Non-static facade for static function ftabHi.
	 */
	TIndexOffU ftabHi(TIndexOffU i) const {
		return Ebwt::ftabHi(
			ftab(),
			eftab(),
			_eh._len,
			_eh._ftabLen,
			_eh._eftabLen,
			i);
	}

	/**
	 * Get "high interpretation" of ftab entry at index i.  The high
	 * interpretation of a regular ftab entry is just the entry
	 * itself.  The high interpretation of an extended entry is the
	 * second correpsonding ui32 in the eftab.
	 *
	 * It's a static member because it's convenient to ask this
	 * question before the Ebwt is fully initialized.
	 */
	static TIndexOffU ftabHi(
		const TIndexOffU *ftab,
		const TIndexOffU *eftab,
		TIndexOffU len,
		TIndexOffU ftabLen,
		TIndexOffU eftabLen,
		TIndexOffU i)
	{
		assert_lt(i, ftabLen);
		if(ftab[i] <= len) {
			return ftab[i];
		} else {
			TIndexOffU efIdx = ftab[i] ^ OFF_MASK;
			assert_lt(efIdx*2+1, eftabLen);
			return eftab[efIdx*2+1];
		}
	}

	/**
	 * Non-static facade for static function ftabLo.
	 */
	TIndexOffU ftabLo(TIndexOffU i) const {
		return Ebwt::ftabLo(
			ftab(),
			eftab(),
			_eh._len,
			_eh._ftabLen,
			_eh._eftabLen,
			i);
	}
	
	/**
	 * Get low bound of ftab range.
	 */
	TIndexOffU ftabLo(const BTDnaString& seq, size_t off) const {
		return ftabLo(ftabSeqToInt(seq, off, false));
	}

	/**
	 * Get high bound of ftab range.
	 */
	TIndexOffU ftabHi(const BTDnaString& seq, size_t off) const {
		return ftabHi(ftabSeqToInt(seq, off, false));
	}
	
	/**
	 * Extract characters from seq starting at offset 'off' and going either
	 * forward or backward, depending on 'rev'.  Order matters when compiling
	 * the integer that gets looked up in the ftab.  Each successive character
	 * is ORed into the least significant bit-pair, and characters are
	 * integrated in the direction of the search.
	 */
	bool
	ftabLoHi(
		const BTDnaString& seq, // sequence to extract from
		size_t off,             // offset into seq to begin extracting
		bool rev,               // reverse while extracting
		TIndexOffU& top,
		TIndexOffU& bot) const
	{
		TIndexOffU fi = ftabSeqToInt(seq, off, rev);
		if(fi == std::numeric_limits<TIndexOffU>::max()) {
			return false;
		}
		top = ftabHi(fi);
		bot = ftabLo(fi+1);
		assert_geq(bot, top);
		return true;
	}
	
	/**
	 * Get "low interpretation" of ftab entry at index i.  The low
	 * interpretation of a regular ftab entry is just the entry
	 * itself.  The low interpretation of an extended entry is the
	 * first correpsonding ui32 in the eftab.
	 *
	 * It's a static member because it's convenient to ask this
	 * question before the Ebwt is fully initialized.
	 */
	static TIndexOffU ftabLo(
		const TIndexOffU *ftab,
		const TIndexOffU *eftab,
		TIndexOffU len,
		TIndexOffU ftabLen,
		TIndexOffU eftabLen,
		TIndexOffU i)
	{
		assert_lt(i, ftabLen);
		if(ftab[i] <= len) {
			return ftab[i];
		} else {
			TIndexOffU efIdx = ftab[i] ^ OFF_MASK;
			assert_lt(efIdx*2+1, eftabLen);
			return eftab[efIdx*2];
		}
	}

	/**
	 * Try to resolve the reference offset of the BW element 'elt'.  If
	 * it can be resolved immediately, return the reference offset.  If
	 * it cannot be resolved immediately, return max value.
	 */
	TIndexOffU tryOffset(TIndexOffU elt) const {
		assert(offs() != NULL);
		if(elt == _zOff) return 0;
		if((elt & _eh._offMask) == elt) {
			TIndexOffU eltOff = elt >> _eh._offRate;
			assert_lt(eltOff, _eh._offsLen);
			TIndexOffU off = offs()[eltOff];
			assert_neq(OFF_MASK, off);
			return off;
		} else {
			// Try looking at zoff
			return OFF_MASK;
		}
	}

	/**
	 * Try to resolve the reference offset of the BW element 'elt' such
	 * that the offset returned is at the right-hand side of the
	 * forward reference substring involved in the hit.
	 */
	TIndexOffU tryOffset(
		TIndexOffU elt,
		bool fw,
		TIndexOffU hitlen) const
	{
		TIndexOffU off = tryOffset(elt);
		if(off != OFF_MASK && !fw) {
			assert_lt(off, _eh._len);
			off = _eh._len - off - 1;
			assert_geq(off, hitlen-1);
			off -= (hitlen-1);
			assert_lt(off, _eh._len);
		}
		return off;
	}

	/**
	 * Walk 'steps' steps to the left and return the row arrived at.
	 */
	TIndexOffU walkLeft(TIndexOffU row, TIndexOffU steps) const;

	/**
	 * Resolve the reference offset of the BW element 'elt'.
	 */
	TIndexOffU getOffset(TIndexOffU row) const;

	/**
	 * Resolve the reference offset of the BW element 'elt' such that
	 * the offset returned is at the right-hand side of the forward
	 * reference substring involved in the hit.
	 */
	TIndexOffU getOffset(
		TIndexOffU elt,
		bool fw,
		TIndexOffU hitlen) const;

	/**
	 * When using read() to create an Ebwt, we have to set a couple of
	 * additional fields in the Ebwt object that aren't part of the
	 * parameter list and are not stored explicitly in the file.  Right
	 * now, this just involves initializing _zEbwtByteOff and
	 * _zEbwtBpOff from _zOff.
	 */
	void postReadInit(EbwtParams& eh) {
		TIndexOffU sideNum     = _zOff / eh._sideBwtLen;
		TIndexOffU sideCharOff = _zOff % eh._sideBwtLen;
		TIndexOffU sideByteOff = sideNum * eh._sideSz;
		_zEbwtByteOff = sideCharOff >> 2;
		assert_lt(_zEbwtByteOff, eh._sideBwtSz);
		_zEbwtBpOff = sideCharOff & 3;
		assert_lt(_zEbwtBpOff, 4);
		_zEbwtByteOff += sideByteOff;
		assert(repOk(eh)); // Ebwt should be fully initialized now
	}

	/**
	 * Given basename of an Ebwt index, read and return its flag.
	 */
	static int32_t readFlags(const string& instr);

	/**
	 * Pretty-print the Ebwt to the given output stream.
	 */
	void print(ostream& out) const {
		print(out, _eh);
	}
	
	/**
	 * Pretty-print the Ebwt and given EbwtParams to the given output
	 * stream.
	 */
	void print(ostream& out, const EbwtParams& eh) const {
		eh.print(out); // print params
		out << "Ebwt (" << (isInMemory()? "memory" : "disk") << "):" << endl
		    << "    zOff: "         << _zOff << endl
		    << "    zEbwtByteOff: " << _zEbwtByteOff << endl
		    << "    zEbwtBpOff: "   << _zEbwtBpOff << endl
		    << "    nPat: "  << _nPat << endl
		    << "    plen: ";
		if(plen() == NULL) {
			out << "NULL" << endl;
		} else {
			out << "non-NULL, [0] = " << plen()[0] << endl;
		}
		out << "    rstarts: ";
		if(rstarts() == NULL) {
			out << "NULL" << endl;
		} else {
			out << "non-NULL, [0] = " << rstarts()[0] << endl;
		}
		out << "    ebwt: ";
		if(ebwt() == NULL) {
			out << "NULL" << endl;
		} else {
			out << "non-NULL, [0] = " << ebwt()[0] << endl;
		}
		out << "    fchr: ";
		if(fchr() == NULL) {
			out << "NULL" << endl;
		} else {
			out << "non-NULL, [0] = " << fchr()[0] << endl;
		}
		out << "    ftab: ";
		if(ftab() == NULL) {
			out << "NULL" << endl;
		} else {
			out << "non-NULL, [0] = " << ftab()[0] << endl;
		}
		out << "    eftab: ";
		if(eftab() == NULL) {
			out << "NULL" << endl;
		} else {
			out << "non-NULL, [0] = " << eftab()[0] << endl;
		}
		out << "    offs: ";
		if(offs() == NULL) {
			out << "NULL" << endl;
		} else {
			out << "non-NULL, [0] = " << offs()[0] << endl;
		}
	}

	// Building
	template <typename TStr> static TStr join(EList<TStr>& l, uint32_t seed);
	template <typename TStr> static TStr join(EList<FileBuf*>& l, EList<RefRecord>& szs, TIndexOffU sztot, const RefReadInParams& refparams, uint32_t seed);
	template <typename TStr> void joinToDisk(EList<FileBuf*>& l, EList<RefRecord>& szs, TIndexOffU sztot, const RefReadInParams& refparams, TStr& ret, ostream& out1, ostream& out2);
	template <typename TStr> void buildToDisk(InorderBlockwiseSA<TStr>& sa, const TStr& s, ostream& out1, ostream& out2, ostream* saOut, ostream* bwtOut);

	// I/O
	void readIntoMemory(int color, int needEntireRev, bool loadSASamp, bool loadFtab, bool loadRstarts, bool justHeader, EbwtParams *params, bool mmSweep, bool loadNames, bool startVerbose);
	void writeFromMemory(bool justHeader, ostream& out1, ostream& out2) const;
	void writeFromMemory(bool justHeader, const string& out1, const string& out2) const;

	// Sanity checking
	void sanityCheckUpToSide(TIndexOff upToSide) const;
	void sanityCheckAll(int reverse) const;
	void restore(SString<char>& s) const;
	void checkOrigs(const EList<SString<char> >& os, bool color, bool mirror) const;

	// Searching and reporting
	void joinedToTextOff(TIndexOffU qlen, TIndexOffU off, TIndexOffU& tidx, TIndexOffU& textoff, TIndexOffU& tlen, bool rejectStraddle, bool& straddled) const;

#define WITHIN_BWT_LEN(x) \
	assert_leq(x[0], this->_eh._sideBwtLen); \
	assert_leq(x[1], this->_eh._sideBwtLen); \
	assert_leq(x[2], this->_eh._sideBwtLen); \
	assert_leq(x[3], this->_eh._sideBwtLen)

#define WITHIN_FCHR(x) \
	assert_leq(x[0], this->fchr()[1]); \
	assert_leq(x[1], this->fchr()[2]); \
	assert_leq(x[2], this->fchr()[3]); \
	assert_leq(x[3], this->fchr()[4])

#define WITHIN_FCHR_DOLLARA(x) \
	assert_leq(x[0], this->fchr()[1]+1); \
	assert_leq(x[1], this->fchr()[2]); \
	assert_leq(x[2], this->fchr()[3]); \
	assert_leq(x[3], this->fchr()[4])

	/**
	 * Count all occurrences of character c from the beginning of the
	 * forward side to <by,bp> and add in the occ[] count up to the side
	 * break just prior to the side.
	 *
	 * A Bowtie 2 side is shaped like:
	 *
	 * XXXXXXXXXXXXXXXX [A] [C] [G] [T]
	 * --------48------ -4- -4- -4- -4-  (numbers in bytes)
	 */
	inline TIndexOffU countBt2Side(const SideLocus& l, int c) const {
		assert_range(0, 3, c);
		assert_range(0, (int)this->_eh._sideBwtSz-1, (int)l._by);
		assert_range(0, 3, (int)l._bp);
		const uint8_t *side = l.side(this->ebwt());
		TIndexOffU cCnt = countUpTo(l, c);
		assert_leq(cCnt, l.toBWRow());
		assert_leq(cCnt, this->_eh._sideBwtLen);
		if(c == 0 && l._sideByteOff <= _zEbwtByteOff && l._sideByteOff + l._by >= _zEbwtByteOff) {
			// Adjust for the fact that we represented $ with an 'A', but
			// shouldn't count it as an 'A' here
			if((l._sideByteOff + l._by > _zEbwtByteOff) ||
			   (l._sideByteOff + l._by == _zEbwtByteOff && l._bp > _zEbwtBpOff))
			{
				cCnt--; // Adjust for '$' looking like an 'A'
			}
		}
		TIndexOffU ret;
		// Now factor in the occ[] count at the side break
		const uint8_t *acgt8 = side + _eh._sideBwtSz;
		const TIndexOffU *acgt = reinterpret_cast<const TIndexOffU*>(acgt8);
		assert_leq(acgt[0], this->_eh._numSides * this->_eh._sideBwtLen); // b/c it's used as padding
		assert_leq(acgt[1], this->_eh._len);
		assert_leq(acgt[2], this->_eh._len);
		assert_leq(acgt[3], this->_eh._len);
		ret = acgt[c] + cCnt + this->fchr()[c];
	#ifndef NDEBUG
		assert_leq(ret, this->fchr()[c+1]); // can't have jumpded into next char's section
		if(c == 0) {
			assert_leq(cCnt, this->_eh._sideBwtLen);
		} else {
			assert_leq(ret, this->_eh._bwtLen);
		}
	#endif
		return ret;
	}

	/**
	 * Count all occurrences of all four nucleotides up to the starting
	 * point (which must be in a forward side) given by 'l' storing the
	 * result in 'cntsUpto', then count nucleotide occurrences within the
	 * range of length 'num' storing the result in 'cntsIn'.  Also, keep
	 * track of the characters occurring within the range by setting
	 * 'masks' accordingly (masks[1][10] == true -> 11th character is a
	 * 'C', and masks[0][10] == masks[2][10] == masks[3][10] == false.
	 */
	inline void countBt2SideRange(
		SideLocus& l,        // top locus
		TIndexOffU num,        // number of elts in range to tall // @double-check
		TIndexOffU* cntsUpto, // A/C/G/T counts up to top
		TIndexOffU* cntsIn,   // A/C/G/T counts within range
		EList<bool> *masks) const // masks indicating which range elts = A/C/G/T
	{
		assert_gt(num, 0);
		assert_range(0, (int)this->_eh._sideBwtSz-1, (int)l._by);
		assert_range(0, 3, (int)l._bp);
		countUpToEx(l, cntsUpto);
		WITHIN_FCHR_DOLLARA(cntsUpto);
		WITHIN_BWT_LEN(cntsUpto);
		const uint8_t *side = l.side(this->ebwt());
		if(l._sideByteOff <= _zEbwtByteOff && l._sideByteOff + l._by >= _zEbwtByteOff) {
			// Adjust for the fact that we represented $ with an 'A', but
			// shouldn't count it as an 'A' here
			if((l._sideByteOff + l._by > _zEbwtByteOff) ||
			   (l._sideByteOff + l._by == _zEbwtByteOff && l._bp > _zEbwtBpOff))
			{
				cntsUpto[0]--; // Adjust for '$' looking like an 'A'
			}
		}
		// Now factor in the occ[] count at the side break
		const TIndexOffU *acgt = reinterpret_cast<const TIndexOffU*>(side + _eh._sideBwtSz);
		assert_leq(acgt[0], this->fchr()[1] + this->_eh.sideBwtLen());
		assert_leq(acgt[1], this->fchr()[2]-this->fchr()[1]);
		assert_leq(acgt[2], this->fchr()[3]-this->fchr()[2]);
		assert_leq(acgt[3], this->fchr()[4]-this->fchr()[3]);
		assert_leq(acgt[0], this->_eh._len + this->_eh.sideBwtLen());
		assert_leq(acgt[1], this->_eh._len);
		assert_leq(acgt[2], this->_eh._len);
		assert_leq(acgt[3], this->_eh._len);
		cntsUpto[0] += (acgt[0] + this->fchr()[0]);
		cntsUpto[1] += (acgt[1] + this->fchr()[1]);
		cntsUpto[2] += (acgt[2] + this->fchr()[2]);
		cntsUpto[3] += (acgt[3] + this->fchr()[3]);
		masks[0].resize(num);
		masks[1].resize(num);
		masks[2].resize(num);
		masks[3].resize(num);
		WITHIN_FCHR_DOLLARA(cntsUpto);
		WITHIN_FCHR_DOLLARA(cntsIn);
		// 'cntsUpto' is complete now.
		// Walk forward until we've tallied the entire 'In' range
		TIndexOffU nm = 0;
		// Rest of this side
		nm += countBt2SideRange2(l, true, num - nm, cntsIn, masks, nm);
		assert_eq(nm, cntsIn[0] + cntsIn[1] + cntsIn[2] + cntsIn[3]);
		assert_leq(nm, num);
		SideLocus lcopy = l;
		while(nm < num) {
			// Subsequent sides, if necessary
			lcopy.nextSide(this->_eh);
			nm += countBt2SideRange2(lcopy, false, num - nm, cntsIn, masks, nm);
			WITHIN_FCHR_DOLLARA(cntsIn);
			assert_leq(nm, num);
			assert_eq(nm, cntsIn[0] + cntsIn[1] + cntsIn[2] + cntsIn[3]);
		}
		assert_eq(num, cntsIn[0] + cntsIn[1] + cntsIn[2] + cntsIn[3]);
		WITHIN_FCHR_DOLLARA(cntsIn);
	}

	/**
	 * Count all occurrences of character c from the beginning of the
	 * forward side to <by,bp> and add in the occ[] count up to the side
	 * break just prior to the side.
	 *
	 * A forward side is shaped like:
	 *
	 * [A] [C] XXXXXXXXXXXXXXXX
	 * -4- -4- --------56------ (numbers in bytes)
	 *         ^
	 *         Side ptr (result from SideLocus.side())
	 *
	 * And following it is a reverse side shaped like:
	 * 
	 * [G] [T] XXXXXXXXXXXXXXXX
	 * -4- -4- --------56------ (numbers in bytes)
	 *         ^
	 *         Side ptr (result from SideLocus.side())
	 *
	 */
	inline void countBt2SideEx(const SideLocus& l, TIndexOffU* arrs) const {
		assert_range(0, (int)this->_eh._sideBwtSz-1, (int)l._by);
		assert_range(0, 3, (int)l._bp);
		countUpToEx(l, arrs);
		if(l._sideByteOff <= _zEbwtByteOff && l._sideByteOff + l._by >= _zEbwtByteOff) {
			// Adjust for the fact that we represented $ with an 'A', but
			// shouldn't count it as an 'A' here
			if((l._sideByteOff + l._by > _zEbwtByteOff) ||
			   (l._sideByteOff + l._by == _zEbwtByteOff && l._bp > _zEbwtBpOff))
			{
				arrs[0]--; // Adjust for '$' looking like an 'A'
			}
		}
		WITHIN_FCHR(arrs);
		WITHIN_BWT_LEN(arrs);
		// Now factor in the occ[] count at the side break
		const uint8_t *side = l.side(this->ebwt());
		const uint8_t *acgt16 = side + this->_eh._sideSz - OFF_SIZE*4;
		const TIndexOffU *acgt = reinterpret_cast<const TIndexOffU*>(acgt16);
		assert_leq(acgt[0], this->fchr()[1] + this->_eh.sideBwtLen());
		assert_leq(acgt[1], this->fchr()[2]-this->fchr()[1]);
		assert_leq(acgt[2], this->fchr()[3]-this->fchr()[2]);
		assert_leq(acgt[3], this->fchr()[4]-this->fchr()[3]);
		assert_leq(acgt[0], this->_eh._len + this->_eh.sideBwtLen());
		assert_leq(acgt[1], this->_eh._len);
		assert_leq(acgt[2], this->_eh._len);
		assert_leq(acgt[3], this->_eh._len);
		arrs[0] += (acgt[0] + this->fchr()[0]);
		arrs[1] += (acgt[1] + this->fchr()[1]);
		arrs[2] += (acgt[2] + this->fchr()[2]);
		arrs[3] += (acgt[3] + this->fchr()[3]);
		WITHIN_FCHR(arrs);
	}

	/**
	 * Counts the number of occurrences of character 'c' in the given Ebwt
	 * side up to (but not including) the given byte/bitpair (by/bp).
	 *
	 * This is a performance-critical function.  This is the top search-
	 * related hit in the time profile.
	 *
	 * Function gets 11.09% in profile
	 */
	inline TIndexOffU countUpTo(const SideLocus& l, int c) const { // @double-check
		// Count occurrences of c in each 64-bit (using bit trickery);
		// Someday countInU64() and pop() functions should be
		// vectorized/SSE-ized in case that helps.
		TIndexOffU cCnt = 0;
		const uint8_t *side = l.side(this->ebwt());
		int i = 0;
#ifdef POPCNT_CAPABILITY
        if ( _usePOPCNTinstruction) {
            for(; i + 7 < l._by; i += 8) {
                cCnt += countInU64<USE_POPCNT_INSTRUCTION>(c, *(uint64_t*)&side[i]);
            }
        } 
        else {
            for(; i + 7 < l._by; i += 8) {
                cCnt += countInU64<USE_POPCNT_GENERIC>(c, *(uint64_t*)&side[i]);
            }
        }
#else
        for(; i + 7 < l._by; i += 8) {
            cCnt += countInU64(c, *(uint64_t*)&side[i]);
        }
#endif		
		// Count occurences of c in the rest of the side (using LUT)
		for(; i < l._by; i++) {
			cCnt += cCntLUT_4[0][c][side[i]];
		}
		// Count occurences of c in the rest of the byte
		if(l._bp > 0) {
			cCnt += cCntLUT_4[(int)l._bp][c][side[i]];
		}
		return cCnt;
	}

	/**
	 * Tricky-bit-bashing bitpair counting for given two-bit value (0-3)
	 * within a 64-bit argument.
	 *
	 * Function gets 2.32% in profile
	 */
#ifdef POPCNT_CAPABILITY
template<typename Operation>
#endif
	inline static void countInU64Ex(uint64_t dw, TIndexOffU* arrs) {
		uint64_t c0 = c_table[0];
		uint64_t x0 = dw ^ c0;
		uint64_t x1 = (x0 >> 1);
		uint64_t x2 = x1 & (0x5555555555555555llu);
		uint64_t x3 = x0 & x2;
#ifdef POPCNT_CAPABILITY
		uint64_t tmp = Operation().pop64(x3);
#else
		uint64_t tmp = pop64(x3);
#endif
		arrs[0] += (uint32_t) tmp;

		c0 = c_table[1];
		x0 = dw ^ c0;
		x1 = (x0 >> 1);
		x2 = x1 & (0x5555555555555555llu);
		x3 = x0 & x2;
#ifdef POPCNT_CAPABILITY
		tmp = Operation().pop64(x3);
#else
		tmp = pop64(x3);
#endif
		arrs[1] += (uint32_t) tmp;

		c0 = c_table[2];
		x0 = dw ^ c0;
		x1 = (x0 >> 1);
		x2 = x1 & (0x5555555555555555llu);
		x3 = x0 & x2;
#ifdef POPCNT_CAPABILITY
		tmp = Operation().pop64(x3);
#else
		tmp = pop64(x3);
#endif
		arrs[2] += (uint32_t) tmp;
	
		c0 = c_table[3];
		x0 = dw ^ c0;
		x1 = (x0 >> 1);
		x2 = x1 & (0x5555555555555555llu);
		x3 = x0 & x2;
#ifdef POPCNT_CAPABILITY
		tmp = Operation().pop64(x3);
#else
		tmp = pop64(x3);
#endif
		arrs[3] += (uint32_t) tmp;
	}

	/**
	 * Counts the number of occurrences of all four nucleotides in the
	 * given side up to (but not including) the given byte/bitpair (by/bp).
	 * Count for 'a' goes in arrs[0], 'c' in arrs[1], etc.
	 */
	inline void countUpToEx(const SideLocus& l, TIndexOffU* arrs) const {
		int i = 0;
		// Count occurrences of each nucleotide in each 64-bit word using
		// bit trickery; note: this seems does not seem to lend a
		// significant boost to performance in practice.  If you comment
		// out this whole loop (which won't affect correctness - it will
		// just cause the following loop to take up the slack) then runtime
		// does not change noticeably. Someday the countInU64() and pop()
		// functions should be vectorized/SSE-ized in case that helps.
		const uint8_t *side = l.side(this->ebwt());

#ifdef POPCNT_CAPABILITY
		if (_usePOPCNTinstruction) {
			for(; i+7 < l._by; i += 8) {
				countInU64Ex<USE_POPCNT_INSTRUCTION>(*(uint64_t*)&side[i], arrs);
			}
		}
		else {
			for(; i+7 < l._by; i += 8) {
				countInU64Ex<USE_POPCNT_GENERIC>(*(uint64_t*)&side[i], arrs);
			}
		}
#else 
		for(; i+7 < l._by; i += 8) {
			countInU64Ex(*(uint64_t*)&side[i], arrs);
		}
#endif
		// Count occurences of nucleotides in the rest of the side (using LUT)
		// Many cache misses on following lines (~20K)
		for(; i < l._by; i++) {
			arrs[0] += cCntLUT_4[0][0][side[i]];
			arrs[1] += cCntLUT_4[0][1][side[i]];
			arrs[2] += cCntLUT_4[0][2][side[i]];
			arrs[3] += cCntLUT_4[0][3][side[i]];
		}
		// Count occurences of c in the rest of the byte
		if(l._bp > 0) {
			arrs[0] += cCntLUT_4[(int)l._bp][0][side[i]];
			arrs[1] += cCntLUT_4[(int)l._bp][1][side[i]];
			arrs[2] += cCntLUT_4[(int)l._bp][2][side[i]];
			arrs[3] += cCntLUT_4[(int)l._bp][3][side[i]];
		}
	}

#ifndef NDEBUG
	/**
	 * Given top and bot loci, calculate counts of all four DNA chars up to
	 * those loci.  Used for more advanced backtracking-search.
	 */
	inline void mapLFEx(
		const SideLocus& l,
		TIndexOffU *arrs
		ASSERT_ONLY(, bool overrideSanity = false)
		) const
	{
		assert_eq(0, arrs[0]);
		assert_eq(0, arrs[1]);
		assert_eq(0, arrs[2]);
		assert_eq(0, arrs[3]);
		countBt2SideEx(l, arrs);
		if(_sanity && !overrideSanity) {
			// Make sure results match up with individual calls to mapLF;
			// be sure to override sanity-checking in the callee, or we'll
			// have infinite recursion
			assert_eq(mapLF(l, 0, true), arrs[0]);
			assert_eq(mapLF(l, 1, true), arrs[1]);
			assert_eq(mapLF(l, 2, true), arrs[2]);
			assert_eq(mapLF(l, 3, true), arrs[3]);
		}
	}
#endif

	/**
	 * Given top and bot rows, calculate counts of all four DNA chars up to
	 * those loci.
	 */
	inline void mapLFEx(
		TIndexOffU top,
		TIndexOffU bot,
		TIndexOffU *tops,
		TIndexOffU *bots
		ASSERT_ONLY(, bool overrideSanity = false)
		) const
	{
		SideLocus ltop, lbot;
		SideLocus::initFromTopBot(top, bot, _eh, ebwt(), ltop, lbot);
		mapLFEx(ltop, lbot, tops, bots ASSERT_ONLY(, overrideSanity));
	}

	/**
	 * Given top and bot loci, calculate counts of all four DNA chars up to
	 * those loci.  Used for more advanced backtracking-search.
	 */
	inline void mapLFEx(
		const SideLocus& ltop,
		const SideLocus& lbot,
		TIndexOffU *tops,
		TIndexOffU *bots
		ASSERT_ONLY(, bool overrideSanity = false)
		) const
	{
		assert(ltop.repOk(this->eh()));
		assert(lbot.repOk(this->eh()));
		assert_eq(0, tops[0]); assert_eq(0, bots[0]);
		assert_eq(0, tops[1]); assert_eq(0, bots[1]);
		assert_eq(0, tops[2]); assert_eq(0, bots[2]);
		assert_eq(0, tops[3]); assert_eq(0, bots[3]);
		countBt2SideEx(ltop, tops);
		countBt2SideEx(lbot, bots);
#ifndef NDEBUG
		if(_sanity && !overrideSanity) {
			// Make sure results match up with individual calls to mapLF;
			// be sure to override sanity-checking in the callee, or we'll
			// have infinite recursion
			assert_eq(mapLF(ltop, 0, true), tops[0]);
			assert_eq(mapLF(ltop, 1, true), tops[1]);
			assert_eq(mapLF(ltop, 2, true), tops[2]);
			assert_eq(mapLF(ltop, 3, true), tops[3]);
			assert_eq(mapLF(lbot, 0, true), bots[0]);
			assert_eq(mapLF(lbot, 1, true), bots[1]);
			assert_eq(mapLF(lbot, 2, true), bots[2]);
			assert_eq(mapLF(lbot, 3, true), bots[3]);
		}
#endif
	}

	/**
	 * Counts the number of occurrences of all four nucleotides in the
	 * given side from the given byte/bitpair (l->_by/l->_bp) (or the
	 * beginning of the side if l == 0).  Count for 'a' goes in arrs[0],
	 * 'c' in arrs[1], etc.
	 *
	 * Note: must account for $.
	 *
	 * Must fill in masks
	 */
	inline TIndexOffU countBt2SideRange2( // @double-check
		const SideLocus& l,
		bool startAtLocus,
		TIndexOffU num,
		TIndexOffU* arrs,
		EList<bool> *masks,
		TIndexOffU maskOff) const
	{
		assert(!masks[0].empty());
		assert_eq(masks[0].size(), masks[1].size());
		assert_eq(masks[0].size(), masks[2].size());
		assert_eq(masks[0].size(), masks[3].size());
		ASSERT_ONLY(TIndexOffU myarrs[4] = {0, 0, 0, 0});
		TIndexOffU nm = 0; // number of nucleotides tallied so far
		int iby = 0;      // initial byte offset
		int ibp = 0;      // initial base-pair offset
		if(startAtLocus) {
			iby = l._by;
			ibp = l._bp;
		} else {
			// Start at beginning
		}
		int by = iby, bp = ibp;
		assert_lt(bp, 4);
		assert_lt(by, (int)this->_eh._sideBwtSz);
		const uint8_t *side = l.side(this->ebwt());
		while(nm < num) {
			int c = (side[by] >> (bp * 2)) & 3;
			assert_lt(maskOff + nm, masks[c].size());
			masks[0][maskOff + nm] = masks[1][maskOff + nm] =
			masks[2][maskOff + nm] = masks[3][maskOff + nm] = false;
			assert_range(0, 3, c);
			// Note: we tally $ just like an A
			arrs[c]++; // tally it
			ASSERT_ONLY(myarrs[c]++);
			masks[c][maskOff + nm] = true; // not dead
			nm++;
			if(++bp == 4) {
				bp = 0;
				by++;
				assert_leq(by, (int)this->_eh._sideBwtSz);
				if(by == (int)this->_eh._sideBwtSz) {
					// Fell off the end of the side
					break;
				}
			}
		}
		WITHIN_FCHR_DOLLARA(arrs);
#ifndef NDEBUG
		if(_sanity) {
			// Make sure results match up with a call to mapLFEx.
			TIndexOffU tops[4] = {0, 0, 0, 0};
			TIndexOffU bots[4] = {0, 0, 0, 0};
			TIndexOffU top = l.toBWRow();
			TIndexOffU bot = top + nm;
			mapLFEx(top, bot, tops, bots, false);
			assert(myarrs[0] == (bots[0] - tops[0]) || myarrs[0] == (bots[0] - tops[0])+1);
			assert_eq(myarrs[1], bots[1] - tops[1]);
			assert_eq(myarrs[2], bots[2] - tops[2]);
			assert_eq(myarrs[3], bots[3] - tops[3]);
		}
#endif
		return nm;
	}

	/**
	 * Return the final character in row i (i.e. the i'th character in the
	 * BWT transform).  Note that the 'L' in the name of the function
	 * stands for 'last', as in the literature.
	 */
	inline int rowL(const SideLocus& l) const {
		// Extract and return appropriate bit-pair
		return unpack_2b_from_8b(l.side(this->ebwt())[l._by], l._bp);
	}

	/**
	 * Return the final character in row i (i.e. the i'th character in the
	 * BWT transform).  Note that the 'L' in the name of the function
	 * stands for 'last', as in the literature.
	 */
	inline int rowL(TIndexOffU i) const {
		// Extract and return appropriate bit-pair
		SideLocus l;
		l.initFromRow(i, _eh, ebwt());
		return rowL(l);
	}

	/**
	 * Given top and bot loci, calculate counts of all four DNA chars up to
	 * those loci.  Used for more advanced backtracking-search.
	 */
	inline void mapLFRange(
		SideLocus& ltop,
		SideLocus& lbot,
		TIndexOffU num,        // Number of elts
		TIndexOffU* cntsUpto,  // A/C/G/T counts up to top
		TIndexOffU* cntsIn,    // A/C/G/T counts within range
		EList<bool> *masks
		ASSERT_ONLY(, bool overrideSanity = false)
		) const
	{
		assert(ltop.repOk(this->eh()));
		assert(lbot.repOk(this->eh()));
		assert_eq(num, lbot.toBWRow() - ltop.toBWRow());
		assert_eq(0, cntsUpto[0]); assert_eq(0, cntsIn[0]);
		assert_eq(0, cntsUpto[1]); assert_eq(0, cntsIn[1]);
		assert_eq(0, cntsUpto[2]); assert_eq(0, cntsIn[2]);
		assert_eq(0, cntsUpto[3]); assert_eq(0, cntsIn[3]);
		countBt2SideRange(ltop, num, cntsUpto, cntsIn, masks);
		assert_eq(num, cntsIn[0] + cntsIn[1] + cntsIn[2] + cntsIn[3]);
#ifndef NDEBUG
		if(_sanity && !overrideSanity) {
			// Make sure results match up with individual calls to mapLF;
			// be sure to override sanity-checking in the callee, or we'll
			// have infinite recursion
			TIndexOffU tops[4] = {0, 0, 0, 0};
			TIndexOffU bots[4] = {0, 0, 0, 0};
			assert(ltop.repOk(this->eh()));
			assert(lbot.repOk(this->eh()));
			mapLFEx(ltop, lbot, tops, bots, false);
			for(int i = 0; i < 4; i++) {
				assert(cntsUpto[i] == tops[i] || tops[i] == bots[i]);
				if(i == 0) {
					assert(cntsIn[i] == bots[i]-tops[i] ||
						   cntsIn[i] == bots[i]-tops[i]+1);
				} else {
					assert_eq(cntsIn[i], bots[i]-tops[i]);
				}
			}
		}
#endif
	}

	/**
	 * Given row i, return the row that the LF mapping maps i to.
	 */
	inline TIndexOffU mapLF(
		const SideLocus& l
		ASSERT_ONLY(, bool overrideSanity = false)
		) const
	{
		ASSERT_ONLY(TIndexOffU srcrow = l.toBWRow());
		TIndexOffU ret;
		assert(l.side(this->ebwt()) != NULL);
		int c = rowL(l);
		assert_lt(c, 4);
		assert_geq(c, 0);
		ret = countBt2Side(l, c);
		assert_lt(ret, this->_eh._bwtLen);
		assert_neq(srcrow, ret);
#ifndef NDEBUG
		if(_sanity && !overrideSanity) {
			// Make sure results match up with results from mapLFEx;
			// be sure to override sanity-checking in the callee, or we'll
			// have infinite recursion
			TIndexOffU arrs[] = { 0, 0, 0, 0 };
			mapLFEx(l, arrs, true);
			assert_eq(arrs[c], ret);
		}
#endif
		return ret;
	}

	/**
	 * Given row i and character c, return the row that the LF mapping maps
	 * i to on character c.
	 */
	inline TIndexOffU mapLF(
		const SideLocus& l, int c
		ASSERT_ONLY(, bool overrideSanity = false)
		) const
	{
		TIndexOffU ret;
		assert_lt(c, 4);
		assert_geq(c, 0);
		ret = countBt2Side(l, c);
		assert_lt(ret, this->_eh._bwtLen);
#ifndef NDEBUG
		if(_sanity && !overrideSanity) {
			// Make sure results match up with results from mapLFEx;
			// be sure to override sanity-checking in the callee, or we'll
			// have infinite recursion
			TIndexOffU arrs[] = { 0, 0, 0, 0 };
			mapLFEx(l, arrs, true);
			assert_eq(arrs[c], ret);
		}
#endif
		return ret;
	}

	/**
	 * Given top and bot loci, calculate counts of all four DNA chars up to
	 * those loci.  Also, update a set of tops and bots for the reverse
	 * index/direction using the idea from the bi-directional BWT paper.
	 */
	inline void mapBiLFEx(
		const SideLocus& ltop,
		const SideLocus& lbot,
		TIndexOffU *tops,
		TIndexOffU *bots,
		TIndexOffU *topsP, // topsP[0] = top
		TIndexOffU *botsP
		ASSERT_ONLY(, bool overrideSanity = false)
		) const
	{
#ifndef NDEBUG
		for(int i = 0; i < 4; i++) {
			assert_eq(0, tops[0]);  assert_eq(0, bots[0]);
		}
#endif
		countBt2SideEx(ltop, tops);
		countBt2SideEx(lbot, bots);
#ifndef NDEBUG
		if(_sanity && !overrideSanity) {
			// Make sure results match up with individual calls to mapLF;
			// be sure to override sanity-checking in the callee, or we'll
			// have infinite recursion
			assert_eq(mapLF(ltop, 0, true), tops[0]);
			assert_eq(mapLF(ltop, 1, true), tops[1]);
			assert_eq(mapLF(ltop, 2, true), tops[2]);
			assert_eq(mapLF(ltop, 3, true), tops[3]);
			assert_eq(mapLF(lbot, 0, true), bots[0]);
			assert_eq(mapLF(lbot, 1, true), bots[1]);
			assert_eq(mapLF(lbot, 2, true), bots[2]);
			assert_eq(mapLF(lbot, 3, true), bots[3]);
		}
#endif
		// bots[0..3] - tops[0..3] = # of ways to extend the suffix with an
		// A, C, G, T
		botsP[0] = topsP[0] + (bots[0] - tops[0]);
		topsP[1] = botsP[0];
		botsP[1] = topsP[1] + (bots[1] - tops[1]);
		topsP[2] = botsP[1];
		botsP[2] = topsP[2] + (bots[2] - tops[2]);
		topsP[3] = botsP[2];
		botsP[3] = topsP[3] + (bots[3] - tops[3]);
	}

	/**
	 * Given row and its locus information, proceed on the given character
	 * and return the next row, or all-fs if we can't proceed on that
	 * character.  Returns max value if this row ends in $.
	 */
	inline TIndexOffU mapLF1(
		TIndexOffU row,       // starting row
		const SideLocus& l, // locus for starting row
		int c               // character to proceed on
		ASSERT_ONLY(, bool overrideSanity = false)
		) const
	{
		if(rowL(l) != c || row == _zOff) return OFF_MASK;
		TIndexOffU ret;
		assert_lt(c, 4);
		assert_geq(c, 0);
		ret = countBt2Side(l, c);
		assert_lt(ret, this->_eh._bwtLen);
#ifndef NDEBUG
		if(_sanity && !overrideSanity) {
			// Make sure results match up with results from mapLFEx;
			// be sure to override sanity-checking in the callee, or we'll
			// have infinite recursion
			TIndexOffU arrs[] = { 0, 0, 0, 0 };
			mapLFEx(l, arrs, true);
			assert_eq(arrs[c], ret);
		}
#endif
		return ret;
	}


	/**
	 * Given row and its locus information, set the row to LF(row) and
	 * return the character that was in the final column.
	 */
	inline int mapLF1(
		TIndexOffU& row,      // starting row
		const SideLocus& l  // locus for starting row
		ASSERT_ONLY(, bool overrideSanity = false)
		) const
	{
		if(row == _zOff) return -1;
		int c = rowL(l);
		assert_range(0, 3, c);
		row = countBt2Side(l, c);
		assert_lt(row, this->_eh._bwtLen);
#ifndef NDEBUG
		if(_sanity && !overrideSanity) {
			// Make sure results match up with results from mapLFEx;
			// be sure to override sanity-checking in the callee, or we'll
			// have infinite recursion
			TIndexOffU arrs[] = { 0, 0, 0, 0 };
			mapLFEx(l, arrs, true);
			assert_eq(arrs[c], row);
		}
#endif
		return c;
	}

#ifndef NDEBUG
	/// Check that in-memory Ebwt is internally consistent with respect
	/// to given EbwtParams; assert if not
	bool inMemoryRepOk(const EbwtParams& eh) const {
		assert_geq(_zEbwtBpOff, 0);
		assert_lt(_zEbwtBpOff, 4);
		assert_lt(_zEbwtByteOff, eh._ebwtTotSz);
		assert_lt(_zOff, eh._bwtLen);
		assert_geq(_nFrag, _nPat);
		return true;
	}

	/// Check that in-memory Ebwt is internally consistent; assert if
	/// not
	bool inMemoryRepOk() const {
		return repOk(_eh);
	}

	/// Check that Ebwt is internally consistent with respect to given
	/// EbwtParams; assert if not
	bool repOk(const EbwtParams& eh) const {
		assert(_eh.repOk());
		if(isInMemory()) {
			return inMemoryRepOk(eh);
		}
		return true;
	}

	/// Check that Ebwt is internally consistent; assert if not
	bool repOk() const {
		return repOk(_eh);
	}
#endif

	bool       _toBigEndian;
	int32_t    _overrideOffRate;
	bool       _verbose;
	bool       _passMemExc;
	bool       _sanity;
	bool       fw_;     // true iff this is a forward index
	FILE       *_in1;    // input fd for primary index file
	FILE       *_in2;    // input fd for secondary index file
	string     _in1Str; // filename for primary index file
	string     _in2Str; // filename for secondary index file
	string     _inSaStr;  // filename for suffix-array file
	string     _inBwtStr; // filename for BWT file
	TIndexOffU  _zOff;
	TIndexOffU  _zEbwtByteOff;
	TIndexOff   _zEbwtBpOff; 
	TIndexOffU  _nPat;  /// number of reference texts
	TIndexOffU  _nFrag; /// number of fragments
	APtrWrap<TIndexOffU> _plen;
	APtrWrap<TIndexOffU> _rstarts; // starting offset of fragments / text indexes
	// _fchr, _ftab and _eftab are expected to be relatively small
	// (usually < 1MB, perhaps a few MB if _fchr is particularly large
	// - like, say, 11).  For this reason, we don't bother with writing
	// them to disk through separate output streams; we
	APtrWrap<TIndexOffU> _fchr;
	APtrWrap<TIndexOffU> _ftab;
	APtrWrap<TIndexOffU> _eftab; // "extended" entries for _ftab
	// _offs may be extremely large.  E.g. for DNA w/ offRate=4 (one
	// offset every 16 rows), the total size of _offs is the same as
	// the total size of the input sequence
	APtrWrap<TIndexOffU> _offs;
	// _ebwt is the Extended Burrows-Wheeler Transform itself, and thus
	// is at least as large as the input sequence.
	APtrWrap<uint8_t> _ebwt;
	bool       _useMm;        /// use memory-mapped files to hold the index
	bool       useShmem_;     /// use shared memory to hold large parts of the index
	EList<string> _refnames; /// names of the reference sequences
	char *mmFile1_;
	char *mmFile2_;
	EbwtParams _eh;
	bool packed_;

	static const TIndexOffU default_bmax = OFF_MASK;
	static const TIndexOffU default_bmaxMultSqrt = OFF_MASK;
	static const TIndexOffU default_bmaxDivN = 4;
	static const int      default_dcv = 1024;
	static const bool     default_noDc = false;
	static const bool     default_useBlockwise = true;
	static const uint32_t default_seed = 0;
#ifdef BOWTIE_64BIT_INDEX
	static const int      default_lineRate = 7;
#else
	static const int      default_lineRate = 6;
#endif
	static const int      default_offRate = 5;
	static const int      default_offRatePlus = 0;
	static const int      default_ftabChars = 10;
	static const bool     default_bigEndian = false;

private:

	ostream& log() const {
		return cout; // TODO: turn this into a parameter
	}

	/// Print a verbose message and flush (flushing is helpful for
	/// debugging)
	void verbose(const string& s) const {
		if(this->verbose()) {
			this->log() << s.c_str();
			this->log().flush();
		}
	}
};

/**
 * Read reference names from an input stream 'in' for an Ebwt primary
 * file and store them in 'refnames'.
 */
void readEbwtRefnames(FILE* fin, EList<string>& refnames);

/**
 * Read reference names from the index with basename 'in' and store
 * them in 'refnames'.
 */
void readEbwtRefnames(const string& instr, EList<string>& refnames);

/**
 * Read just enough of the Ebwt's header to determine whether it's
 * colorspace.
 */
bool readEbwtColor(const string& instr);

/**
 * Read just enough of the Ebwt's header to determine whether it's
 * entirely reversed.
 */
bool readEntireReverse(const string& instr);

///////////////////////////////////////////////////////////////////////
//
// Functions for building Ebwts
//
///////////////////////////////////////////////////////////////////////

/**
 * Join several text strings together in a way that's compatible with
 * the text-chunking scheme dictated by chunkRate parameter.
 *
 * The non-static member Ebwt::join additionally builds auxilliary
 * arrays that maintain a mapping between chunks in the joined string
 * and the original text strings.
 */
template<typename TStr>
TStr Ebwt::join(EList<TStr>& l, uint32_t seed) {
	RandomSource rand; // reproducible given same seed
	rand.init(seed);
	TStr ret;
	TIndexOffU guessLen = 0;
	for(TIndexOffU i = 0; i < l.size(); i++) {
		guessLen += length(l[i]);
	}
	ret.resize(guessLen);
	TIndexOffU off = 0;
	for(size_t i = 0; i < l.size(); i++) {
		TStr& s = l[i];
		assert_gt(s.length(), 0);
		for(size_t j = 0; j < s.size(); j++) {
			ret.set(s[j], off++);
		}
	}
	return ret;
}

/**
 * Join several text strings together in a way that's compatible with
 * the text-chunking scheme dictated by chunkRate parameter.
 *
 * The non-static member Ebwt::join additionally builds auxilliary
 * arrays that maintain a mapping between chunks in the joined string
 * and the original text strings.
 */
template<typename TStr>
TStr Ebwt::join(EList<FileBuf*>& l,
                EList<RefRecord>& szs,
                TIndexOffU sztot,
                const RefReadInParams& refparams,
                uint32_t seed)
{
	RandomSource rand; // reproducible given same seed
	rand.init(seed);
	RefReadInParams rpcp = refparams;
	TStr ret;
	TIndexOffU guessLen = sztot;
	ret.resize(guessLen);
	ASSERT_ONLY(TIndexOffU szsi = 0);
	TIndexOffU dstoff = 0;
	for(TIndexOffU i = 0; i < l.size(); i++) {
		// For each sequence we can pull out of istream l[i]...
		assert(!l[i]->eof());
		bool first = true;
		while(!l[i]->eof()) {
			RefRecord rec = fastaRefReadAppend(*l[i], first, ret, dstoff, rpcp);
			first = false;
			if(rec.first && rec.len == 0) {
				continue;
			}
			TIndexOffU bases = rec.len;
			assert_eq(rec.off, szs[szsi].off);
			assert_eq(rec.len, szs[szsi].len);
			assert_eq(rec.first, szs[szsi].first);
			ASSERT_ONLY(szsi++);
			if(bases == 0) continue;
		}
	}
	return ret;
}

/**
 * Join several text strings together according to the text-chunking
 * scheme specified in the EbwtParams.  Ebwt fields calculated in this
 * function are written directly to disk.
 *
 * It is assumed, but not required, that the header values have already
 * been written to 'out1' before this function is called.
 *
 * The static member Ebwt::join just returns a joined version of a
 * list of strings without building any of the auxilliary arrays.
 */
template<typename TStr>
void Ebwt::joinToDisk(
	EList<FileBuf*>& l,
	EList<RefRecord>& szs,
	TIndexOffU sztot,
	const RefReadInParams& refparams,
	TStr& ret,
	ostream& out1,
	ostream& out2)
{
	RefReadInParams rpcp = refparams;
	assert_gt(szs.size(), 0);
	assert_gt(l.size(), 0);
	assert_gt(sztot, 0);
	// Not every fragment represents a distinct sequence - many
	// fragments may correspond to a single sequence.  Count the
	// number of sequences here by counting the number of "first"
	// fragments.
	this->_nPat = 0;
	this->_nFrag = 0;
	for(TIndexOffU i = 0; i < szs.size(); i++) {
		if(szs[i].len > 0) this->_nFrag++;
		if(szs[i].first && szs[i].len > 0) this->_nPat++;
	}
	assert_gt(this->_nPat, 0);
	assert_geq(this->_nFrag, this->_nPat);
	_rstarts.reset();
	writeU<TIndexOffU>(out1, this->_nPat, this->toBe());
	// Allocate plen[]
	try {
		this->_plen.init(new TIndexOffU[this->_nPat], this->_nPat);
	} catch(bad_alloc& e) {
		cerr << "Out of memory allocating plen[] in Ebwt::join()"
		     << " at " << __FILE__ << ":" << __LINE__ << endl;
		throw e;
	}
	// For each pattern, set plen
	TIndexOff npat = -1;
	for(TIndexOffU i = 0; i < szs.size(); i++) {
		if(szs[i].first && szs[i].len > 0) {
			if(npat >= 0) {
				writeU<TIndexOffU>(out1, this->plen()[npat], this->toBe());
			}
			this->plen()[++npat] = (szs[i].len + szs[i].off);
		} else if(!szs[i].first) {
			// edge case, but we could get here with npat == -1
			// e.g. when building from a reference of all Ns
			if (npat < 0) npat = 0;
			this->plen()[npat] += (szs[i].len + szs[i].off);
		}
	}
	assert_eq((TIndexOffU)npat, this->_nPat-1);
	writeU<TIndexOffU>(out1, this->plen()[npat], this->toBe());
	// Write the number of fragments
	writeU<TIndexOffU>(out1, this->_nFrag, this->toBe());
	TIndexOffU seqsRead = 0;
	ASSERT_ONLY(TIndexOffU szsi = 0);
	ASSERT_ONLY(TIndexOffU entsWritten = 0);
	TIndexOffU dstoff = 0;
	// For each filebuf
	for(unsigned int i = 0; i < l.size(); i++) {
		assert(!l[i]->eof());
		bool first = true;
		TIndexOffU patoff = 0;
		// For each *fragment* (not necessary an entire sequence) we
		// can pull out of istream l[i]...
		while(!l[i]->eof()) {
			string name;
			// Push a new name onto our vector
			_refnames.push_back("");
			RefRecord rec = fastaRefReadAppend(
				*l[i], first, ret, dstoff, rpcp, &_refnames.back());
			first = false;
			TIndexOffU bases = rec.len;
			if(rec.first && rec.len > 0) {
				if(_refnames.back().length() == 0) {
					// If name was empty, replace with an index
					ostringstream stm;
					stm << seqsRead;
					_refnames.back() = stm.str();
				}
			} else {
				// This record didn't actually start a new sequence so
				// no need to add a name
				//assert_eq(0, _refnames.back().length());
				_refnames.pop_back();
			}
			if(rec.first && rec.len == 0) {
				continue;
			}
			assert_lt(szsi, szs.size());
			assert_eq(rec.off, szs[szsi].off);
			assert_eq(rec.len, szs[szsi].len);
			assert_eq(rec.first, szs[szsi].first);
			assert(rec.first || rec.off > 0);
			ASSERT_ONLY(szsi++);
			// Increment seqsRead if this is the first fragment
			if(rec.first) seqsRead++;
			if(bases == 0) continue;
			assert_leq(bases, this->plen()[seqsRead-1]);
			// Reset the patoff if this is the first fragment
			if(rec.first) patoff = 0;
			patoff += rec.off; // add fragment's offset from end of last frag.
			// Adjust rpcps
			//uint32_t seq = seqsRead-1;
			ASSERT_ONLY(entsWritten++);
			// This is where rstarts elements are written to the output stream
			//writeU32(out1, oldRetLen, this->toBe()); // offset from beginning of joined string
			//writeU32(out1, seq,       this->toBe()); // sequence id
			//writeU32(out1, patoff,    this->toBe()); // offset into sequence
			patoff += bases;
		}
		assert_gt(szsi, 0);
		l[i]->reset();
		assert(!l[i]->eof());
#ifndef NDEBUG
		int c = l[i]->get();
		assert_eq('>', c);
		assert(!l[i]->eof());
		l[i]->reset();
		assert(!l[i]->eof());
#endif
	}
	assert_eq(entsWritten, this->_nFrag);
}

/**
 * Build an Ebwt from a string 's' and its suffix array 'sa' (which
 * might actually be a suffix array *builder* that builds blocks of the
 * array on demand).  The bulk of the Ebwt, i.e. the ebwt and offs
 * arrays, is written directly to disk.  This is by design: keeping
 * those arrays in memory needlessly increases the footprint of the
 * building process.  Instead, we prefer to build the Ebwt directly
 * "to disk" and then read it back into memory later as necessary.
 *
 * It is assumed that the header values and join-related values (nPat,
 * plen) have already been written to 'out1' before this function
 * is called.  When this function is finished, it will have
 * additionally written ebwt, zOff, fchr, ftab and eftab to the primary
 * file and offs to the secondary file.
 *
 * Assume DNA/RNA/any alphabet with 4 or fewer elements.
 * Assume occ array entries are 32 bits each.
 *
 * @param sa            the suffix array to convert to a Ebwt
 * @param s             the original string
 * @param out
 */
template<typename TStr>
void Ebwt::buildToDisk(
	InorderBlockwiseSA<TStr>& sa,
	const TStr& s,
	ostream& out1,
	ostream& out2,
	ostream* saOut,
	ostream* bwtOut)
{
	const EbwtParams& eh = this->_eh;

	assert(eh.repOk());
	assert_eq(s.length()+1, sa.size());
	assert_eq(s.length(), eh._len);
	assert_gt(eh._lineRate, 3);
	assert(sa.suffixItrIsReset());

	TIndexOffU len = eh._len;
	TIndexOffU ftabLen = eh._ftabLen;
	TIndexOffU sideSz = eh._sideSz;
	TIndexOffU ebwtTotSz = eh._ebwtTotSz;
	TIndexOffU fchr[] = {0, 0, 0, 0, 0};
	EList<TIndexOffU> ftab(EBWT_CAT);
	TIndexOffU zOff = OFF_MASK;

	// Save # of occurrences of each character as we walk along the bwt
	TIndexOffU occ[4] = {0, 0, 0, 0};
	TIndexOffU occSave[4] = {0, 0, 0, 0};

	// Record rows that should "absorb" adjacent rows in the ftab.
	// The absorbed rows represent suffixes shorter than the ftabChars
	// cutoff.
	uint8_t absorbCnt = 0;
	EList<uint8_t> absorbFtab(EBWT_CAT);
	try {
		VMSG_NL("Allocating ftab, absorbFtab");
		ftab.resize(ftabLen);
		ftab.fillZero();
		absorbFtab.resize(ftabLen);
		absorbFtab.fillZero();
	} catch(bad_alloc &e) {
		cerr << "Out of memory allocating ftab[] or absorbFtab[] "
		     << "in Ebwt::buildToDisk() at " << __FILE__ << ":"
		     << __LINE__ << endl;
		throw e;
	}

	// Allocate the side buffer; holds a single side as its being
	// constructed and then written to disk.  Reused across all sides.
#ifdef SIXTY4_FORMAT
	EList<uint64_t> ebwtSide(EBWT_CAT);
#else
	EList<uint8_t> ebwtSide(EBWT_CAT);
#endif
	try {
#ifdef SIXTY4_FORMAT
		ebwtSide.resize(sideSz >> 3);
#else
		ebwtSide.resize(sideSz);
#endif
	} catch(bad_alloc &e) {
		cerr << "Out of memory allocating ebwtSide[] in "
		     << "Ebwt::buildToDisk() at " << __FILE__ << ":"
		     << __LINE__ << endl;
		throw e;
	}

	// Points to the base offset within ebwt for the side currently
	// being written
	TIndexOffU side = 0;

	// Whether we're assembling a forward or a reverse bucket
	bool fw;
	TIndexOff sideCur = 0;
	fw = true;

	// Have we skipped the '$' in the last column yet?
	ASSERT_ONLY(bool dollarSkipped = false);

	TIndexOffU si = 0;   // string offset (chars)
	ASSERT_ONLY(TIndexOffU lastSufInt = 0);
	ASSERT_ONLY(bool inSA = true); // true iff saI still points inside suffix
	                               // array (as opposed to the padding at the
	                               // end)
	// Iterate over packed bwt bytes
	VMSG_NL("Entering Ebwt loop");
	ASSERT_ONLY(TIndexOffU beforeEbwtOff = (TIndexOffU)out1.tellp()); // @double-check - pos_type, std::streampos 
	
	// First integer in the suffix-array output file is the length of the
	// array, including $
	if(saOut != NULL) {
		// Write length word
		writeU<TIndexOffU>(*saOut, len+1, this->toBe());
	}
	
	// First integer in the BWT output file is the length of BWT(T), including $
	if(bwtOut != NULL) {
		// Write length word
		writeU<TIndexOffU>(*bwtOut, len+1, this->toBe());
	}
	
	while(side < ebwtTotSz) {
		// Sanity-check our cursor into the side buffer
		assert_geq(sideCur, 0);
		assert_lt(sideCur, (int)eh._sideBwtSz);
		assert_eq(0, side % sideSz); // 'side' must be on side boundary
		ebwtSide[sideCur] = 0; // clear
		assert_lt(side + sideCur, ebwtTotSz);
		// Iterate over bit-pairs in the si'th character of the BWT
#ifdef SIXTY4_FORMAT
		for(int bpi = 0; bpi < 32; bpi++, si++)
#else
		for(int bpi = 0; bpi < 4; bpi++, si++)
#endif
		{
			int bwtChar;
			bool count = true;
			if(si <= len) {
				// Still in the SA; extract the bwtChar
				TIndexOffU saElt = sa.nextSuffix();
				// Write it to the optional suffix-array output file
				if(saOut != NULL) {
					writeU<TIndexOffU>(*saOut, saElt, this->toBe());
				}
				// TODO: what exactly to write to the BWT output file?  How to
				// represent $?  How to pack nucleotides into bytes/words?
				
				// (that might have triggered sa to calc next suf block)
				if(saElt == 0) {
					// Don't add the '$' in the last column to the BWT
					// transform; we can't encode a $ (only A C T or G)
					// and counting it as, say, an A, will mess up the
					// LR mapping
					bwtChar = 0; count = false;
					ASSERT_ONLY(dollarSkipped = true);
					zOff = si; // remember the SA row that
					           // corresponds to the 0th suffix
				} else {
					bwtChar = (int)(s[saElt-1]);
					assert_lt(bwtChar, 4);
					// Update the fchr
					fchr[bwtChar]++;
				}
				// Update ftab
				if((len-saElt) >= (TIndexOffU)eh._ftabChars) {
					// Turn the first ftabChars characters of the
					// suffix into an integer index into ftab.  The
					// leftmost (lowest index) character of the suffix
					// goes in the most significant bit pair if the
					// integer.
					TIndexOffU sufInt = 0;
					for(int i = 0; i < eh._ftabChars; i++) {
						sufInt <<= 2;
						assert_lt((TIndexOffU)i, len-saElt);
						sufInt |= (unsigned char)(s[saElt+i]);
					}
					// Assert that this prefix-of-suffix is greater
					// than or equal to the last one (true b/c the
					// suffix array is sorted)
					#ifndef NDEBUG
					if(lastSufInt > 0) assert_geq(sufInt, lastSufInt);
					lastSufInt = sufInt;
					#endif
					// Update ftab
					assert_lt(sufInt+1, ftabLen);
					ftab[sufInt+1]++;
					if(absorbCnt > 0) {
						// Absorb all short suffixes since the last
						// transition into this transition
						absorbFtab[sufInt] = absorbCnt;
						absorbCnt = 0;
					}
				} else {
					// Otherwise if suffix is fewer than ftabChars
					// characters long, then add it to the 'absorbCnt';
					// it will be absorbed into the next transition
					assert_lt(absorbCnt, 255);
					absorbCnt++;
				}
				// Suffix array offset boundary? - update offset array
				if((si & eh._offMask) == si) {
					assert_lt((si >> eh._offRate), eh._offsLen);
					// Write offsets directly to the secondary output
					// stream, thereby avoiding keeping them in memory
					writeU<TIndexOffU>(out2, saElt, this->toBe());
				}
			} else {
				// Strayed off the end of the SA, now we're just
				// padding out a bucket
				#ifndef NDEBUG
				if(inSA) {
					// Assert that we wrote all the characters in the
					// string before now
					assert_eq(si, len+1);
					inSA = false;
				}
				#endif
				// 'A' used for padding; important that padding be
				// counted in the occ[] array
				bwtChar = 0;
			}
			if(count) occ[bwtChar]++;
			// Append BWT char to bwt section of current side
			if(fw) {
				// Forward bucket: fill from least to most
#ifdef SIXTY4_FORMAT
				ebwtSide[sideCur] |= ((uint64_t)bwtChar << (bpi << 1));
				if(bwtChar > 0) assert_gt(ebwtSide[sideCur], 0);
#else
				pack_2b_in_8b(bwtChar, ebwtSide[sideCur], bpi);
				assert_eq((ebwtSide[sideCur] >> (bpi*2)) & 3, bwtChar);
#endif
			} else {
				// Backward bucket: fill from most to least
#ifdef SIXTY4_FORMAT
				ebwtSide[sideCur] |= ((uint64_t)bwtChar << ((31 - bpi) << 1));
				if(bwtChar > 0) assert_gt(ebwtSide[sideCur], 0);
#else
				pack_2b_in_8b(bwtChar, ebwtSide[sideCur], 3-bpi);
				assert_eq((ebwtSide[sideCur] >> ((3-bpi)*2)) & 3, bwtChar);
#endif
			}
		} // end loop over bit-pairs
		assert_eq(dollarSkipped ? 3 : 0, (occ[0] + occ[1] + occ[2] + occ[3]) & 3);
#ifdef SIXTY4_FORMAT
		assert_eq(0, si & 31);
#else
		assert_eq(0, si & 3);
#endif

		sideCur++;
		if(sideCur == (int)eh._sideBwtSz) {
			sideCur = 0;
			TIndexOffU *cpptr = reinterpret_cast<TIndexOffU*>(ebwtSide.ptr());
			// Write 'A', 'C', 'G' and 'T' tallies
			side += sideSz;
			assert_leq(side, eh._ebwtTotSz);
#ifdef BOWTIE_64BIT_INDEX
			cpptr[(sideSz >> 3)-4] = endianizeU<TIndexOffU>(occSave[0], this->toBe());
			cpptr[(sideSz >> 3)-3] = endianizeU<TIndexOffU>(occSave[1], this->toBe());
			cpptr[(sideSz >> 3)-2] = endianizeU<TIndexOffU>(occSave[2], this->toBe());
			cpptr[(sideSz >> 3)-1] = endianizeU<TIndexOffU>(occSave[3], this->toBe());
#else
			cpptr[(sideSz >> 2)-4] = endianizeU<TIndexOffU>(occSave[0], this->toBe());
			cpptr[(sideSz >> 2)-3] = endianizeU<TIndexOffU>(occSave[1], this->toBe());
			cpptr[(sideSz >> 2)-2] = endianizeU<TIndexOffU>(occSave[2], this->toBe());
			cpptr[(sideSz >> 2)-1] = endianizeU<TIndexOffU>(occSave[3], this->toBe());
#endif
			occSave[0] = occ[0];
			occSave[1] = occ[1];
			occSave[2] = occ[2];
			occSave[3] = occ[3];
			// Write backward side to primary file
			out1.write((const char *)ebwtSide.ptr(), sideSz);
		}
	}
	VMSG_NL("Exited Ebwt loop");
	assert_neq(zOff, OFF_MASK);
	if(absorbCnt > 0) {
		// Absorb any trailing, as-yet-unabsorbed short suffixes into
		// the last element of ftab
		absorbFtab[ftabLen-1] = absorbCnt;
	}
	// Assert that our loop counter got incremented right to the end
	assert_eq(side, eh._ebwtTotSz);
	// Assert that we wrote the expected amount to out1
	assert_eq(((TIndexOffU)out1.tellp() - beforeEbwtOff), eh._ebwtTotSz); // @double-check - pos_type
	// assert that the last thing we did was write a forward bucket

	//
	// Write zOff to primary stream
	//
	writeU<TIndexOffU>(out1, zOff, this->toBe());

	//
	// Finish building fchr
	//
	// Exclusive prefix sum on fchr
	for(int i = 1; i < 4; i++) {
		fchr[i] += fchr[i-1];
	}
	assert_eq(fchr[3], len);
	// Shift everybody up by one
	for(int i = 4; i >= 1; i--) {
		fchr[i] = fchr[i-1];
	}
	fchr[0] = 0;
	if(_verbose) {
		for(int i = 0; i < 5; i++)
			cout << "fchr[" << "ACGT$"[i] << "]: " << fchr[i] << endl;
	}
	// Write fchr to primary file
	for(int i = 0; i < 5; i++) {
		writeU<TIndexOffU>(out1, fchr[i], this->toBe());
	}

	//
	// Finish building ftab and build eftab
	//
	// Prefix sum on ftable
	TIndexOffU eftabLen = 0;
	assert_eq(0, absorbFtab[0]);
	for(TIndexOffU i = 1; i < ftabLen; i++) {
		if(absorbFtab[i] > 0) eftabLen += 2;
	}
	assert_leq(eftabLen, (TIndexOffU)eh._ftabChars*2);
	eftabLen = eh._ftabChars*2;
	EList<TIndexOffU> eftab(EBWT_CAT);
	try {
		eftab.resize(eftabLen);
		eftab.fillZero();
	} catch(bad_alloc &e) {
		cerr << "Out of memory allocating eftab[] "
		     << "in Ebwt::buildToDisk() at " << __FILE__ << ":"
		     << __LINE__ << endl;
		throw e;
	}
	TIndexOffU eftabCur = 0;
	for(TIndexOffU i = 1; i < ftabLen; i++) {
		TIndexOffU lo = ftab[i] + Ebwt::ftabHi(ftab.ptr(), eftab.ptr(), len, ftabLen, eftabLen, i-1);
		if(absorbFtab[i] > 0) {
			// Skip a number of short pattern indicated by absorbFtab[i]
			TIndexOffU hi = lo + absorbFtab[i];
			assert_lt(eftabCur*2+1, eftabLen);
			eftab[eftabCur*2] = lo;
			eftab[eftabCur*2+1] = hi;
			ftab[i] = (eftabCur++) ^ OFF_MASK; // insert pointer into eftab
			assert_eq(lo, Ebwt::ftabLo(ftab.ptr(), eftab.ptr(), len, ftabLen, eftabLen, i));
			assert_eq(hi, Ebwt::ftabHi(ftab.ptr(), eftab.ptr(), len, ftabLen, eftabLen, i));
		} else {
			ftab[i] = lo;
		}
	}
	assert_eq(Ebwt::ftabHi(ftab.ptr(), eftab.ptr(), len, ftabLen, eftabLen, ftabLen-1), len+1);
	// Write ftab to primary file
	for(TIndexOffU i = 0; i < ftabLen; i++) {
		writeU<TIndexOffU>(out1, ftab[i], this->toBe());
	}
	// Write eftab to primary file
	for(TIndexOffU i = 0; i < eftabLen; i++) {
		writeU<TIndexOffU>(out1, eftab[i], this->toBe());
	}

	// Note: if you'd like to sanity-check the Ebwt, you'll have to
	// read it back into memory first!
	assert(!isInMemory());
	VMSG_NL("Exiting Ebwt::buildToDisk()");
}

/**
 * Try to find the Bowtie index specified by the user.  First try the
 * exact path given by the user.  Then try the user-provided string
 * appended onto the path of the "indexes" subdirectory below this
 * executable, then try the provided string appended onto
 * "$BOWTIE2_INDEXES/".
 */
string adjustEbwtBase(const string& cmdline,
					  const string& ebwtFileBase,
					  bool verbose);


extern string gLastIOErrMsg;

/* Checks whether a call to read() failed or not. */
inline bool is_read_err(int fdesc, ssize_t ret, size_t count){
	if (ret < 0) {
		std::stringstream sstm;
		sstm << "ERRNO: " << errno << " ERR Msg:" << strerror(errno) << std::endl;
		gLastIOErrMsg = sstm.str();
		return true;
	}
	return false;
}

/* Checks whether a call to fread() failed or not. */
inline bool is_fread_err(FILE* file_hd, size_t ret, size_t count){
	if (ferror(file_hd)) {
		gLastIOErrMsg = "Error Reading File!";
		return true;
	}
	return false;
}


#endif /*EBWT_H_*/