File: summary.hpp

package info (click to toggle)
mothur 1.48.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 13,692 kB
  • sloc: cpp: 161,866; makefile: 122; sh: 31
file content (163 lines) | stat: -rwxr-xr-x 8,373 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
//
//  summary.hpp
//  Mothur
//
//  Created by Sarah Westcott on 3/27/17.
//  Copyright © 2017 Schloss Lab. All rights reserved.
//

#ifndef summary_hpp
#define summary_hpp

#include "mothurout.h"
#include "sequence.hpp"
#include "counttable.h"

class Summary {

public:

#ifdef UNIT_TEST
    friend class TestSummary;
#endif

    Summary(int p) { processors = p; m = MothurOut::getInstance(); total = 0; numUniques = 0; hasNameOrCount = false; nameCountNumUniques = 0; type = "count"; }
    ~Summary() = default;

    long long summarizeFasta(string f, string n, string o); //provide fasta file to summarize (paralellized) and optional nameorCountfile and optional outputfile for individual seqs info. To skip nameCount or output file, n="" and / or o=""
    long long summarizeFasta(string f, string o); //provide fasta file to summarize (paralellized) and optional outputfile for individual seqs info. To skip output file, o=""
    long long summarizeFastaSummary(string f); //provide summary of fasta file to summarize (paralellized)
    long long summarizeFastaSummary(string f, string n); //provide summary of fasta file and name or count file to summarize (paralellized)
    long long summarizeContigsSummary(string f); //provide summary of contigs summary file to summarize (paralellized)
    long long summarizeContigsSummary(string f, string n); //provide summary of contigs summary file and name or count file to summarize (paralellized)
    long long summarizeAlignSummary(string f); //provide summary of contigs summary file to summarize (paralellized)
    long long summarizeAlignSummary(string f, string n); //provide summary of contigs summary file and name or count file to summarize (paralellized)

    vector<long long> getDefaults();
    //fasta and summary
    vector<long long> getStart() { return (getValues(startPosition)); } //returns vector of 8 locations. (min, 2.5, 25, 50, 75, 97.5, max, mean)
    long long getStart(double value) { return (getValue(startPosition, value)); } //2.5 = 2.5% of sequences of sequences start before, 25 = location 25% of sequences start before
    vector<long long> getEnd() { return (getValues(endPosition)); } //returns vector of 8 locations. (min, 2.5, 25, 50, 75, 97.5, max, mean)
    long long getEnd(double value) { return (getValue(endPosition, value)); } //2.5 = 2.5% of sequences of sequences end after, 25 = location 25% of sequences end after
    vector<long long> getAmbig() { return (getValues(ambigBases)); } //returns vector of 8 locations. (min, 2.5, 25, 50, 75, 97.5, max, mean)
    long long getAmbig(double value) { return (getValue(ambigBases, value)); } //25 = max abigous bases 25% of sequences contain
    vector<long long> getLength() { return (getValues(seqLength)); } //returns vector of 8 locations. (min, 2.5, 25, 50, 75, 97.5, max, mean)
    long long getLength(double value) { return (getValue(seqLength, value)); } // 25 = min length of 25% of sequences
    vector<long long> getHomop() { return (getValues(longHomoPolymer)); } //returns vector of 8 locations. (min, 2.5, 25, 50, 75, 97.5, max, mean)
    long long getHomop(double value) { return (getValue(longHomoPolymer, value)); }

    //contigs
    vector<long long> getOStart() { return (getValues(ostartPosition)); } //contigs overlap start - returns vector of 8 locations. (min, 2.5, 25, 50, 75, 97.5, max, mean)
    long long getOStart(double value) { return (getValue(ostartPosition, value)); } //contigs overlap start - 2.5 = 2.5% of sequences of sequences start before, 25 = location 25% of sequences start before
    vector<long long> getOEnd() { return (getValues(oendPosition)); } //contigs overlap end -returns vector of 8 locations. (min, 2.5, 25, 50, 75, 97.5, max, mean)
    long long getOEnd(double value) { return (getValue(oendPosition, value)); } //contigs overlap end -2.5 = 2.5% of sequences of sequences end after, 25 = location 25% of sequences end after
    vector<long long> getOLength() { return (getValues(oseqLength)); } //contigs overlap length - returns vector of 8 locations. (min, 2.5, 25, 50, 75, 97.5, max, mean)
    long long getOLength(double value) { return (getValue(oseqLength, value)); } //contigs overlap length - 25 = min length of 25% of sequences
    vector<long long> getMisMatches() { return (getValues(misMatches)); } //returns vector of 8 locations. (min, 2.5, 25, 50, 75, 97.5, max, mean)
    long long getMisMatches(double value) { return (getValue(misMatches, value)); }
    vector<long long> getNumNs() { return (getValues(numNs)); } //returns vector of 8 locations. (min, 2.5, 25, 50, 75, 97.5, max, mean)
    long long getNumNs(double value) { return (getValue(numNs, value)); } //25 = max abigous bases 25% of sequences contain
    vector<long long> getSims() { return (getValues(sims)); } //contigs overlap length - returns vector of 8 locations. (min, 2.5, 25, 50, 75, 97.5, max, mean)
    long long getSims(double value) { return (getValue(sims, value)); } //contigs overlap length - 25 = min length of 25% of sequences
    vector<long long> getScores() { return (getValues(scores)); } //returns vector of 8 locations. (min, 2.5, 25, 50, 75, 97.5, max, mean)
    long long getScores(double value) { return (getValue(scores, value)); }
    vector<long long> getNumInserts() { return (getValues(inserts)); } //returns vector of 8 locations. (min, 2.5, 25, 50, 75, 97.5, max, mean)
    long long getNumInserts(double value) { return (getValue(inserts, value)); } //25 = max abigous bases 25% of sequences contain
		int getMaxAbundance();

    long long getTotalSeqs() { return total; }
    long long getUniqueSeqs() { return numUniques; }

private:

    MothurOut* m;
    Utils util;
    int processors;
    long long total, numUniques, nameCountNumUniques;
    bool hasNameOrCount;
    string type;
    map<int, long long> startPosition;
    map<int, long long> endPosition;
    map<int, long long> seqLength;
    map<int, long long> ambigBases;
    map<int, long long> longHomoPolymer;
    map<int, long long> ostartPosition;
    map<int, long long> oendPosition;
    map<int, long long> oseqLength;
    map<int, long long> misMatches;
    map<int, long long> numNs;
    map<float, long long> sims;
    map<float, long long> scores;
    map<int, long long> inserts;

    map<string, int> nameMap;
    map<int, long long>::iterator it;

    void processNameCount(string n); //determines whether name or count and fills nameMap, ignored if n = ""
    vector<long long> getValues(map<int, long long>& positions);
    long long getValue(map<int, long long>& positions, double);
    vector<long long> getValues(map<float, long long>& positions);
    long long getValue(map<float, long long>& positions, double);
    bool isCountFile(string);



};
/**************************************************************************************************/
struct seqSumData {
    map<int, long long> startPosition;
    map<int, long long> endPosition;
    map<int, long long> seqLength;
    map<int, long long> ambigBases;
    map<int, long long> longHomoPolymer;
    map<int, long long> ostartPosition;
    map<int, long long> oendPosition;
    map<int, long long> oseqLength;
    map<int, long long> misMatches;
    map<int, long long> numNs;
    map<float, long long> sims;
    map<float, long long> scores;
    map<int, long long> inserts;


    string filename, summaryFile, contigsfile, output;
    double start;
    double end;
    long long count;
    long long total;
    MothurOut* m;
    bool hasNameMap;
    map<string, int> nameMap;
    Utils util;


    seqSumData(){}
    //FastaSummarize - output file created
    seqSumData(string f, string sum, double st, double en, bool na, map<string, int> nam) {
        filename = f;
        m = MothurOut::getInstance();
        start = st;
        end = en;
        hasNameMap = na;
        nameMap = nam;
        count = 0;
        total = 0;
        summaryFile = sum;
    }

    //FastaSummarySummarize - no output files
    seqSumData(string f, double st, double en, bool na, map<string, int> nam) {
        filename = f;
        m = MothurOut::getInstance();
        start = st;
        end = en;
        hasNameMap = na;
        nameMap = nam;
        count = 0;
        total = 0;
    }
};

/**************************************************************************************************/

#endif /* summary_hpp */