File: AlignerOptions.h

package info (click to toggle)
snap-aligner 1.0~beta.18%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 9,036 kB
  • ctags: 4,550
  • sloc: cpp: 106,701; ansic: 5,239; python: 227; makefile: 80
file content (129 lines) | stat: -rw-r--r-- 4,565 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
/*++

Module Name:

    AlignerOptions.h

Abstract:

    Common parameters for running single & paired alignment.

Authors:

    Ravi Pandya, May, 2012

Environment:
`
    User mode service.

Revision History:

    Integrated from SingleAligner.cpp & PairedAligner.cpp

--*/

#pragma once

#include "stdafx.h"
#include "options.h"
#include "Genome.h"
#include "Read.h"

#define MAPQ_LIMIT_FOR_SINGLE_HIT 10

struct AbstractOptions
{
    virtual void usageMessage() = 0;

    virtual bool parse(const char** argv, int argc, int& n, bool *done) = 0;
};

enum FileType {UnknownFileType, SAMFile, FASTQFile, BAMFile, InterleavedFASTQFile, CRAMFile};  // Add more as needed

struct SNAPFile {
	SNAPFile() : fileName(NULL), secondFileName(NULL), fileType(UnknownFileType), isStdio(false), omitSQLines(false) {}
    const char          *fileName;
    const char          *secondFileName;
    FileType             fileType;
    bool                 isCompressed;
    bool                 isStdio;           // Only applies to the first file for two-file inputs
	bool				 omitSQLines;		// Special undocumented option for Charles Chiu's group.  Mostly a bad idea.

    PairedReadSupplierGenerator *createPairedReadSupplierGenerator(int numThreads, bool quicklyDropUnpairedReads, const ReaderContext& context);
    ReadSupplierGenerator *createReadSupplierGenerator(int numThreads, const ReaderContext& context);
    static bool generateFromCommandLine(const char **args, int nArgs, int *argsConsumed, SNAPFile *snapFile, bool paired, bool isInput);
};

struct AlignerOptions : public AbstractOptions
{
    AlignerOptions(const char* i_commandLine, bool forPairedEnd = false);

    const char         *commandLine;
    const char         *indexDir;
    const char         *similarityMapFile;
    int                 numThreads;
    unsigned            maxDist;
    float               maxDistFraction;
    unsigned            numSeedsFromCommandLine;
    double              seedCoverage;       // Exclusive with numSeeds; this is readSize/seedSize
    bool                seedCountSpecified; // Has either -n or -sc been specified?  This bool is used to make sure they're not both specified on the command line
    unsigned            maxHits;
    int                 minWeightToCheck;
    bool                bindToProcessors;
    bool                ignoreMismatchedIDs;
    SNAPFile            outputFile;
    int                 nInputs;
    SNAPFile           *inputs;
    ReadClippingType    clipping;
    bool                sortOutput;
    bool                noIndex;
    bool                noDuplicateMarking;
    bool                noQualityCalibration;
    unsigned            sortMemory; // total output sorting buffer size in Gb
    unsigned            filterFlags;
    bool                explorePopularSeeds;
    bool                stopOnFirstHit;
	bool				useM;	// Should we generate CIGAR strings using = and X, or using the old-style M?
    unsigned            gapPenalty; // if non-zero use gap penalty aligner
    AbstractOptions    *extra; // extra options
    const char         *rgLineContents;
    const char         *perfFileName;
    bool                useTimingBarrier;
    unsigned            extraSearchDepth;
    const char         *defaultReadGroup; // if not specified in input
    bool                ignoreSecondaryAlignments; // on input, default true
    int                 maxSecondaryAlignmentAdditionalEditDistance;
	int					maxSecondaryAlignments;
    int                 maxSecondaryAlignmentsPerContig;
    bool                preserveClipping;
    float               expansionFactor;
    bool                noUkkonen;
    bool                noOrderedEvaluation;
	bool				noTruncation;
	unsigned			minReadLength;
	bool				mapIndex;
	bool				prefetchIndex;
    size_t              writeBufferSize;
    
    static bool         useHadoopErrorMessages; // This is static because it's global (and I didn't want to push the options object to every place in the code)
    static bool         outputToStdout;         // Likewise

    void usage();

    virtual void usageMessage();

    virtual bool parse(const char** argv, int argc, int& n, bool *done);

    enum FilterFlags
    {
        FilterUnaligned =           0x0001,
        FilterSingleHit =           0x0002,
        FilterMultipleHits =        0x0004,
        FilterBothMatesMatch =      0x0008,
		FilterTooShort =            0x0010
    };

    bool passFilter(Read* read, AlignmentResult result, bool tooShort);
    
    virtual bool isPaired() { return false; }
};