File: optidata.hpp

package info (click to toggle)
mothur 1.48.5-1
  • links: PTS, VCS
  • area: main
  • in suites: forky
  • size: 13,684 kB
  • sloc: cpp: 161,854; makefile: 122; sh: 31
file content (86 lines) | stat: -rw-r--r-- 3,710 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
//
//  optidata.hpp
//  Mothur
//
//  Created by Sarah Westcott on 5/10/18.
//  Copyright © 2018 Schloss Lab. All rights reserved.
//

#ifndef optidata_hpp
#define optidata_hpp

#include "mothurout.h"
#include "listvector.hpp"
#include "sparsedistancematrix.h"
#include "counttable.h"


/*
 #ifdef UNIT_TEST
     friend class TestOptiMatrix;
     friend class FakeOptiMatrix;
 #endif
     
 */
class OptiData {
public:
    
    OptiData(double c)  { m = MothurOut::getInstance(); cutoff = c; }
    virtual ~OptiData(){}
    
    set<long long> getCloseSeqs(long long i);
    bool isClose(long long, long long);
    long long getNumClose(long long);
    map<string, long long> getNameIndexMap();
    string getName(long long); //name from nameMap index
    set<string> getNames(set<long long>); //name from nameMap index
    
    long long getNumSeqs() { return closeness.size(); }
    long long getNumSingletons() { return singletons.size(); }
    virtual long long getNumDists(); //number of distances under cutoff
    ListVector* getListSingle();
    
    //for mgcluster - reading blast files
    virtual vector< set<long long> > getBlastOverlap() { vector< set<long long> > blank; return blank; }
    virtual string getOverlapName(long long) { return ""; } //name from nameMap index
    
    virtual void randomizeRefs(){};
    virtual vector<string> getRefSingletonNames() { vector<string> temp; return temp;  }
    virtual vector<long long> getTranslatedBins(vector<vector<string> >&, vector< vector<long long> >&) { vector<long long> temp; return temp;  }
    virtual OptiData* extractRefMatrix() { OptiData* temp = nullptr; return temp;  }
    virtual OptiData* extractMatrixSubset(set<long long>&) { OptiData* temp = nullptr; return temp;  }
    virtual OptiData* extractMatrixSubset(set<string>&) { OptiData* temp = nullptr; return temp;  }
    virtual long long getNumFitSingletons() { return 0; } //user singletons
    virtual long long getNumFitDists() { return 0; } //user distances under cutoff
    virtual long long getNumRefDists() { return 0; } //ref distances under cutoff
    
    virtual ListVector* getFitListSingle() { ListVector* list = nullptr; return list; }
    virtual long long getNumFitTrueSingletons() { return 0; }
    
    virtual vector<long long> getRefSeqs() { vector<long long> temp; return temp;  }
    virtual vector<long long> getFitSeqs() { vector<long long> temp; return temp;  }
    virtual long long getNumFitSeqs() { return 0; }
    virtual long long getNumFitClose(long long) { return 0;  }
    virtual long long getNumRefClose(long long) { return 0;  }
    virtual set<long long> getCloseFitSeqs(long long i) { set<long long> temp; return temp;  }
    virtual set<long long> getCloseRefSeqs(long long i) { set<long long> temp; return temp;  }
    virtual bool isCloseFit(long long j, long long i, bool&) { return false; }
    virtual long long print(ostream&);
    
    // in the case of all distances being below the cutoff, the TN and FP will always be 0
    // This is because nothing is considered "far apart". The mcc score will always be 0.
    // In this case we need to select a different calculator to cluster
    bool mccValidCalc();
    
protected:
    Utils util; MothurOut* m;
    vector< set<long long> > closeness;  //closeness[0] contains indexes of seqs "close" to seq 0.
    vector<string> singletons; //name of seqs with NO distances in matrix, if name file is given then it contains 2nd column of namefile
    vector<string> nameMap;  //name of seqs with distances in matrix, if name file is given then it contains 2nd column of namefile
    double cutoff;
    
    set<long long> getIndexes(set<string> seqs);
};


#endif /* optidata_hpp */