File: AlignerOptions.h

package info (click to toggle)
snap-aligner 2.0.2%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 6,648 kB
  • sloc: cpp: 41,013; ansic: 5,239; python: 227; makefile: 85; sh: 27
file content (186 lines) | stat: -rw-r--r-- 7,486 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
/*++

Module Name:

    AlignerOptions.h

Abstract:

    Common parameters for running single & paired alignment.

Authors:

    Ravi Pandya, May, 2012

Environment:

    User mode service.

Revision History:

    Integrated from SingleAligner.cpp & PairedAligner.cpp

--*/

#pragma once

#include "stdafx.h"
#include "options.h"
#include "Genome.h"
#include "Read.h"



#if INSTRUMENTATION_FOR_PAPER

#define MAX_HIT_SIZE_LOG_2  15  // This is for the instrumentation
extern _int64 g_alignmentTimeByHitCountsOfEachSeed[MAX_HIT_SIZE_LOG_2+1][MAX_HIT_SIZE_LOG_2+1];  // In the paired-end aligner, if you have seeds A and B with hit set sizes |A| and |B| then the total time in ns gets added into g_alignmentTimeByHitCountsOfEachSeed[log2(|A|)][log2(|B|)]
extern _int64 g_alignmentCountByHitCountsOfEachSeed[MAX_HIT_SIZE_LOG_2 + 1][MAX_HIT_SIZE_LOG_2 + 1];  // Same as above, but just add one per time.
extern _int64 g_scoreCountByHitCountsOfEachSeed[MAX_HIT_SIZE_LOG_2 + 1][MAX_HIT_SIZE_LOG_2 + 1];
extern _int64 g_setIntersectionSizeByHitCountsOfEachSeed[MAX_HIT_SIZE_LOG_2 + 1][MAX_HIT_SIZE_LOG_2 + 1];
extern _int64 g_100xtotalRatioOfSetIntersectionSizeToSmallerSeedHitCountByCountsOfEachSeed[MAX_HIT_SIZE_LOG_2 + 1][MAX_HIT_SIZE_LOG_2 + 1];
extern _int64 g_totalSizeOfSmallerHitSet;
extern _int64 g_totalSizeOfSetIntersection;
extern _int64 g_alignmentsWithMoreThanOneCandidateWhereTheBestCandidateIsScoredFirst;
extern _int64 g_alignmentsWithMoreThanOneCandidate;

#endif // INSTRUMENTATION_FOR_PAPER

#define MAPQ_LIMIT_FOR_SINGLE_HIT 10

struct AbstractOptions
{
    virtual void usageMessage() = 0;

    virtual bool parse(const char** argv, int argc, int& n, bool *done) = 0;
};

enum FileType {UnknownFileType, SAMFile, FASTQFile, BAMFile, InterleavedFASTQFile, CRAMFile};  // Add more as needed

struct SNAPFile {
	SNAPFile() : fileName(NULL), secondFileName(NULL), fileType(UnknownFileType), isStdio(false), omitSQLines(false) {}
    const char          *fileName;
    const char          *secondFileName;
    FileType             fileType;
    bool                 isCompressed;
    bool                 isStdio;           // Only applies to the first file for two-file inputs
	bool				 omitSQLines;		// Special (formerly) undocumented option for Charles Chiu's group.  Mostly a bad idea.

    PairedReadSupplierGenerator *createPairedReadSupplierGenerator(int numThreads, bool quicklyDropUnpairedReads, const ReaderContext& context);
    ReadSupplierGenerator *createReadSupplierGenerator(int numThreads, const ReaderContext& context);
    static bool generateFromCommandLine(const char **args, int nArgs, int *argsConsumed, SNAPFile *snapFile, bool paired, bool isInput);
};

//
// A place to stick all of the various optimization diable flags that are here to test aligner
// performance.
//
struct DisabledOptimizations {
    DisabledOptimizations() : noUkkonen(false), noOrderedEvaluation(false), noTruncation(false), noEditDistance(false), noBandedAffineGap(false), noMaxKForIndel(false)
    {}

    bool                noUkkonen;
    bool                noOrderedEvaluation;
    bool				noTruncation;
    bool                noEditDistance;
    bool                noBandedAffineGap;
    bool                noMaxKForIndel;
}; // DisabledOptimizations

extern bool g_suppressStatusMessages; // Setting this causes WriteStatusMessage not to do anything.
extern bool g_suppressErrorMessages; // Setting this causes WriteErrorMessage not to do anything.

struct AlignerOptions : public AbstractOptions
{
    AlignerOptions(const char* i_commandLine, bool forPairedEnd = false);

    const char         *commandLine;
    const char         *indexDir;
    const char         *similarityMapFile;
    int                 numThreads;
    unsigned            maxDist;
    float               maxDistFraction;
    unsigned            maxDistForIndels;
    unsigned            numSeedsFromCommandLine;
    double              seedCoverage;       // Exclusive with numSeeds; this is readSize/seedSize
    bool                seedCountSpecified; // Has either -n or -sc been specified?  This bool is used to make sure they're not both specified on the command line
    unsigned            maxHits;
    int                 minWeightToCheck;
    bool                bindToProcessors;
    bool                ignoreMismatchedIDs;
    SNAPFile            outputFile;
    int                 nInputs;
    SNAPFile           *inputs;
    ReadClippingType    clipping;
    bool                sortOutput;
    bool                noIndex;
    bool                noDuplicateMarking;
    bool                noQualityCalibration;   // This doesn't appear to be used.  
    unsigned            sortMemory; // total output sorting buffer size in Gb
    unsigned            filterFlags;
    bool                explorePopularSeeds;
    bool                stopOnFirstHit;
	bool				useM;	// Should we generate CIGAR strings using = and X, or using the old-style M?
    unsigned            gapPenalty; // if non-zero use gap penalty aligner
    AbstractOptions    *extra; // extra options
    const char         *rgLineContents;
    const char         *perfFileName;
    bool                useTimingBarrier;
    unsigned            extraSearchDepth;
    const char         *defaultReadGroup; // if not specified in input
    bool                ignoreSecondaryAlignments; // on input, default true
    int                 maxSecondaryAlignmentAdditionalEditDistance;
	int					maxSecondaryAlignments;
    int                 maxSecondaryAlignmentsPerContig;
    int                 flattenMAPQAtOrBelow;
    bool                preserveClipping;
    float               expansionFactor;
    DisabledOptimizations disabledOptimizations;
    bool                useAffineGap;
    bool                useSoftClipping;
    unsigned            matchReward;
    unsigned            subPenalty;
    unsigned            gapOpenPenalty;
    unsigned            gapExtendPenalty;
    unsigned            fivePrimeEndBonus;
    unsigned            threePrimeEndBonus;
	unsigned			minReadLength;
	bool				mapIndex;
	bool				prefetchIndex;
    size_t              writeBufferSize;
    bool                dropIndexBeforeSort;
    bool                killIfTooSlow;
    const char *        sortIntermediateDirectory;
    bool                profile;
    bool                profileAffineGap;
    bool                ignoreAlignmentAdjustmentsForOm;
    bool                emitInternalScore;
    char                internalScoreTag[3];
	bool				altAwareness;
    int                 maxScoreGapToPreferNonALTAlignment;
    bool                emitALTAlignments;
    bool                attachAlignmentTimes;
    bool                preserveFASTQComments;
    
    static bool         useHadoopErrorMessages; // This is static because it's global (and I didn't want to push the options object to every place in the code)
    static bool         outputToStdout;         // Likewise

    void usage();

    virtual void usageMessage();

    virtual bool parse(const char** argv, int argc, int& n, bool *done);

    enum FilterFlags
    {
        FilterUnaligned =           0x0001,
        FilterSingleHit =           0x0002,
        FilterMultipleHits =        0x0004,
        FilterBothMatesMatch =      0x0008,
		FilterTooShort =            0x0010
    };

    bool passFilter(Read* read, AlignmentResult result, bool tooShort, bool secondaryAlignment);
    
    virtual bool isPaired() { return false; }
};