1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
|
/*++
Module Name:
AlignerOptions.h
Abstract:
Common parameters for running single & paired alignment.
Authors:
Ravi Pandya, May, 2012
Environment:
User mode service.
Revision History:
Integrated from SingleAligner.cpp & PairedAligner.cpp
--*/
#pragma once
#include "stdafx.h"
#include "options.h"
#include "Genome.h"
#include "Read.h"
#if INSTRUMENTATION_FOR_PAPER
#define MAX_HIT_SIZE_LOG_2 15 // This is for the instrumentation
extern _int64 g_alignmentTimeByHitCountsOfEachSeed[MAX_HIT_SIZE_LOG_2+1][MAX_HIT_SIZE_LOG_2+1]; // In the paired-end aligner, if you have seeds A and B with hit set sizes |A| and |B| then the total time in ns gets added into g_alignmentTimeByHitCountsOfEachSeed[log2(|A|)][log2(|B|)]
extern _int64 g_alignmentCountByHitCountsOfEachSeed[MAX_HIT_SIZE_LOG_2 + 1][MAX_HIT_SIZE_LOG_2 + 1]; // Same as above, but just add one per time.
extern _int64 g_scoreCountByHitCountsOfEachSeed[MAX_HIT_SIZE_LOG_2 + 1][MAX_HIT_SIZE_LOG_2 + 1];
extern _int64 g_setIntersectionSizeByHitCountsOfEachSeed[MAX_HIT_SIZE_LOG_2 + 1][MAX_HIT_SIZE_LOG_2 + 1];
extern _int64 g_100xtotalRatioOfSetIntersectionSizeToSmallerSeedHitCountByCountsOfEachSeed[MAX_HIT_SIZE_LOG_2 + 1][MAX_HIT_SIZE_LOG_2 + 1];
extern _int64 g_totalSizeOfSmallerHitSet;
extern _int64 g_totalSizeOfSetIntersection;
extern _int64 g_alignmentsWithMoreThanOneCandidateWhereTheBestCandidateIsScoredFirst;
extern _int64 g_alignmentsWithMoreThanOneCandidate;
#endif // INSTRUMENTATION_FOR_PAPER
#define MAPQ_LIMIT_FOR_SINGLE_HIT 10
struct AbstractOptions
{
virtual void usageMessage() = 0;
virtual bool parse(const char** argv, int argc, int& n, bool *done) = 0;
};
enum FileType {UnknownFileType, SAMFile, FASTQFile, BAMFile, InterleavedFASTQFile, CRAMFile}; // Add more as needed
struct SNAPFile {
SNAPFile() : fileName(NULL), secondFileName(NULL), fileType(UnknownFileType), isStdio(false), omitSQLines(false) {}
const char *fileName;
const char *secondFileName;
FileType fileType;
bool isCompressed;
bool isStdio; // Only applies to the first file for two-file inputs
bool omitSQLines; // Special (formerly) undocumented option for Charles Chiu's group. Mostly a bad idea.
PairedReadSupplierGenerator *createPairedReadSupplierGenerator(int numThreads, bool quicklyDropUnpairedReads, const ReaderContext& context);
ReadSupplierGenerator *createReadSupplierGenerator(int numThreads, const ReaderContext& context);
static bool generateFromCommandLine(const char **args, int nArgs, int *argsConsumed, SNAPFile *snapFile, bool paired, bool isInput);
};
//
// A place to stick all of the various optimization diable flags that are here to test aligner
// performance.
//
struct DisabledOptimizations {
DisabledOptimizations() : noUkkonen(false), noOrderedEvaluation(false), noTruncation(false), noEditDistance(false), noBandedAffineGap(false), noMaxKForIndel(false)
{}
bool noUkkonen;
bool noOrderedEvaluation;
bool noTruncation;
bool noEditDistance;
bool noBandedAffineGap;
bool noMaxKForIndel;
}; // DisabledOptimizations
extern bool g_suppressStatusMessages; // Setting this causes WriteStatusMessage not to do anything.
extern bool g_suppressErrorMessages; // Setting this causes WriteErrorMessage not to do anything.
struct AlignerOptions : public AbstractOptions
{
AlignerOptions(const char* i_commandLine, bool forPairedEnd = false);
const char *commandLine;
const char *indexDir;
const char *similarityMapFile;
int numThreads;
unsigned maxDist;
float maxDistFraction;
unsigned maxDistForIndels;
unsigned numSeedsFromCommandLine;
double seedCoverage; // Exclusive with numSeeds; this is readSize/seedSize
bool seedCountSpecified; // Has either -n or -sc been specified? This bool is used to make sure they're not both specified on the command line
unsigned maxHits;
int minWeightToCheck;
bool bindToProcessors;
bool ignoreMismatchedIDs;
SNAPFile outputFile;
int nInputs;
SNAPFile *inputs;
ReadClippingType clipping;
bool sortOutput;
bool noIndex;
bool noDuplicateMarking;
bool noQualityCalibration; // This doesn't appear to be used.
unsigned sortMemory; // total output sorting buffer size in Gb
unsigned filterFlags;
bool explorePopularSeeds;
bool stopOnFirstHit;
bool useM; // Should we generate CIGAR strings using = and X, or using the old-style M?
unsigned gapPenalty; // if non-zero use gap penalty aligner
AbstractOptions *extra; // extra options
const char *rgLineContents;
const char *perfFileName;
bool useTimingBarrier;
unsigned extraSearchDepth;
const char *defaultReadGroup; // if not specified in input
bool ignoreSecondaryAlignments; // on input, default true
int maxSecondaryAlignmentAdditionalEditDistance;
int maxSecondaryAlignments;
int maxSecondaryAlignmentsPerContig;
int flattenMAPQAtOrBelow;
bool preserveClipping;
float expansionFactor;
DisabledOptimizations disabledOptimizations;
bool useAffineGap;
bool useSoftClipping;
unsigned matchReward;
unsigned subPenalty;
unsigned gapOpenPenalty;
unsigned gapExtendPenalty;
unsigned fivePrimeEndBonus;
unsigned threePrimeEndBonus;
unsigned minReadLength;
bool mapIndex;
bool prefetchIndex;
size_t writeBufferSize;
bool dropIndexBeforeSort;
bool killIfTooSlow;
const char * sortIntermediateDirectory;
bool profile;
bool profileAffineGap;
bool ignoreAlignmentAdjustmentsForOm;
bool emitInternalScore;
char internalScoreTag[3];
bool altAwareness;
int maxScoreGapToPreferNonALTAlignment;
bool emitALTAlignments;
bool attachAlignmentTimes;
bool preserveFASTQComments;
static bool useHadoopErrorMessages; // This is static because it's global (and I didn't want to push the options object to every place in the code)
static bool outputToStdout; // Likewise
void usage();
virtual void usageMessage();
virtual bool parse(const char** argv, int argc, int& n, bool *done);
enum FilterFlags
{
FilterUnaligned = 0x0001,
FilterSingleHit = 0x0002,
FilterMultipleHits = 0x0004,
FilterBothMatesMatch = 0x0008,
FilterTooShort = 0x0010
};
bool passFilter(Read* read, AlignmentResult result, bool tooShort, bool secondaryAlignment);
virtual bool isPaired() { return false; }
};
|