File: FindMaxInterval.hpp

package info (click to toggle)
pbseqlib 5.3.5%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 7,020 kB
  • sloc: cpp: 77,250; python: 331; sh: 103; makefile: 41
file content (133 lines) | stat: -rw-r--r-- 5,936 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#ifndef _BLASR_FIND_MAX_INTERVAL_HPP_
#define _BLASR_FIND_MAX_INTERVAL_HPP_

#include <cmath>
#include <fstream>
#include <iostream>

#include <semaphore.h>

#include <alignment/algorithms/anchoring/BasicEndpoint.hpp>
#include <alignment/algorithms/anchoring/GlobalChain.hpp>
#include <alignment/algorithms/anchoring/LongestIncreasingSubsequence.hpp>
#include <alignment/datastructures/anchoring/ClusterList.hpp>
#include <alignment/datastructures/anchoring/MatchPos.hpp>
#include <alignment/datastructures/anchoring/WeightedInterval.hpp>
#include <alignment/statistics/VarianceAccumulator.hpp>

unsigned int NumRemainingBases(DNALength curPos, DNALength intervalLength);

class IntervalSearchParameters
{
public:
    bool advanceHalf;
    int globalChainType;
    float maxPValue;
    float aboveCategoryPValue;
    bool warp;
    bool fastMaxInterval;
    bool aggressiveIntervalCut;
    int verbosity;
    float ddPValueThreshold;
    IntervalSearchParameters();
};

template <typename T_Sequence, typename T_AnchorList>
class DefaultWeightFunction
{
public:
    float operator()(T_Sequence &text, T_Sequence &read, T_AnchorList matchPosList);
};

template <typename T_Pos>
class MatchPosQueryOrderFunctor
{
public:
    int operator()(T_Pos &pos);
};

template <typename T_MatchList>
void PrintLIS(T_MatchList &matchList, DNALength curPos, DNALength curGenomePos,
              DNALength nextGenomePos, DNALength clp, DNALength cle);

template <typename T_MatchList, typename T_SequenceDB>
void FilterMatchesAsLIMSTemplateSquare(T_MatchList &matches, DNALength queryLength,
                                       DNALength limsTemplateLength, T_SequenceDB &seqDB);

template <typename T_MatchList, typename T_SequenceBoundaryDB>
void AdvanceIndexToPastInterval(T_MatchList &pos, DNALength nPos, DNALength intervalLength,
                                DNALength contigLength, T_SequenceBoundaryDB &SeqBoundary,
                                DNALength startIndex, DNALength startIntervalBoundary,
                                DNALength &index, DNALength &indexIntervalBoundary);

template <typename T_MatchList>
int RemoveZeroLengthAnchors(T_MatchList &matchList);

template <typename T_MatchList>
int RemoveOverlappingAnchors(T_MatchList &matchList);

template <typename T_MatchList>
int SumAnchors(T_MatchList &pos, int start, int end);

template <typename T_MatchList, typename T_SequenceBoundaryDB>
void StoreLargestIntervals(T_MatchList &pos,
                           // End search for intervals at boundary positions
                           // stored in seqBoundaries
                           T_SequenceBoundaryDB &ContigStartPos,
                           // parameters
                           // How many values to search through for a max set.
                           DNALength intervalLength,
                           // How many sets to keep track of
                           int minSize, std::vector<DNALength> &start, std::vector<DNALength> &end);

template <typename T_MatchList, typename T_PValueFunction, typename T_WeightFunction,
          typename T_SequenceBoundaryDB, typename T_ReferenceSequence, typename T_Sequence>
int FindMaxIncreasingInterval(
    // Input
    // readDir is used to indicate if the interval that is being stored is
    // in the forward or reverse strand.  This is important later when
    // refining alignments so that the correct sequence is aligned back
    // to the reference.
    int readDir, T_MatchList &pos,
    // How many values to search through for a max set.
    DNALength intervalLength,
    // How many sets to keep track of
    VectorIndex nBest,
    // End search for intervals at boundary positions
    // stored in seqBoundaries
    T_SequenceBoundaryDB &ContigStartPos,
    // First rand intervals by their p-value
    T_PValueFunction &MatchPValueFunction,
    // When ranking intervals, sum over weights determined by MatchWeightFunction
    T_WeightFunction &MatchWeightFunction,
    // Output.
    // The increasing interval coordinates,
    // in order by queue weight.
    WeightedIntervalSet &intervalQueue, T_ReferenceSequence &reference, T_Sequence &query,
    IntervalSearchParameters &params,
    std::vector<BasicEndpoint<ChainedMatchPos> > *chainEndpointBuffer, ClusterList &clusterList,
    VarianceAccumulator<float> &accumPValue, VarianceAccumulator<float> &accumWeight,
    VarianceAccumulator<float> &accumNumAnchorBases);

template <typename T_MatchList, typename T_PValueFunction, typename T_WeightFunction,
          typename T_SequenceBoundaryDB, typename T_ReferenceSequence, typename T_Sequence>
int FastFindMaxIncreasingInterval(
    int readDir, T_MatchList &pos, DNALength intervalLength, VectorIndex nBest,
    T_SequenceBoundaryDB &ContigStartPos, T_PValueFunction &MatchPValueFunction,
    T_WeightFunction &MatchWeightFunction, WeightedIntervalSet &intervalQueue,
    T_ReferenceSequence &reference, T_Sequence &query, IntervalSearchParameters &params,
    std::vector<BasicEndpoint<ChainedMatchPos> > *chainEndpointBuffer, ClusterList &clusterList,
    VarianceAccumulator<float> &accumPValue, VarianceAccumulator<float> &accumWeight);

template <typename T_MatchList, typename T_PValueFunction, typename T_WeightFunction,
          typename T_SequenceBoundaryDB, typename T_ReferenceSequence, typename T_Sequence>
int ExhaustiveFindMaxIncreasingInterval(
    int readDir, T_MatchList &pos, DNALength intervalLength, VectorIndex nBest,
    T_SequenceBoundaryDB &ContigStartPos, T_PValueFunction &MatchPValueFunction,
    T_WeightFunction &MatchWeightFunction, WeightedIntervalSet &intervalQueue,
    T_ReferenceSequence &reference, T_Sequence &query, IntervalSearchParameters &params,
    std::vector<BasicEndpoint<ChainedMatchPos> > *chainEndpointBuffer, ClusterList &clusterList,
    VarianceAccumulator<float> &accumPValue, VarianceAccumulator<float> &accumWeight);

#include "FindMaxIntervalImpl.hpp"
#endif