File: lefsecommand.h

package info (click to toggle)
mothur 1.48.5-1
  • links: PTS, VCS
  • area: main
  • in suites: forky
  • size: 13,684 kB
  • sloc: cpp: 161,854; makefile: 122; sh: 31
file content (82 lines) | stat: -rwxr-xr-x 4,855 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
//
//  lefsecommand.h
//  Mothur
//
//  Created by SarahsWork on 6/12/13.
//  Copyright (c) 2013 Schloss Lab. All rights reserved.
//

#ifndef __Mothur__lefsecommand__
#define __Mothur__lefsecommand__

#include "command.hpp"

/* 
 Columns = groups, rows are OTUs, class = design
 
 From http://huttenhower.sph.harvard.edu/galaxy/root?tool_id=lefse_upload
 Input data consist of a collection of m samples (columns) each made up of n numerical features (rows, typically normalized per-sample, red representing high values and green low). These samples are labeled with a class (taking two or more possible values) that represents the main biological hypothesis under investigation; they may also have one or more subclass labels reflecting within-class groupings.
 
 Step 1: the Kruskall-Wallis test analyzes all features, testing whether the values in different classes are differentially distributed. Features violating the null hypothesis are further analyzed in Step 2.
 Step 2: the pairwise Wilcoxon test checks whether all pairwise comparisons between subclasses within different classes significantly agree with the class level trend.
 Step 3: the resulting subset of vectors is used to build a Linear Discriminant Analysis model from which the relative difference among classes is used to rank the features. The final output thus consists of a list of features that are discriminative with respect to the classes, consistent with the subclass grouping within classes, and ranked according to the effect size with which they differentiate classes.
*/


#include "command.hpp"
#include "inputdata.h"
#include "designmap.h"
#include "sharedclrvectors.hpp"

/**************************************************************************************************/

class LefseCommand : public Command {
public:
    LefseCommand(string);
    ~LefseCommand(){}
    
    vector<string> setParameters();
    string getCommandName()			{ return "lefse";			}
    string getCommandCategory()		{ return "OTU-Based Approaches";		}
    
    string getOutputPattern(string);
	string getHelpString();
    string getCitation() { return "Segata, N., J. Izard, L. Waldron, D. Gevers, L. Miropolsky, W. S. Garrett, and C. Huttenhower. 2011. Metagenomic biomarker discovery and explanation. Genome Biol 12:R60, http://www.mothur.org/wiki/Lefse"; }
    string getDescription()		{ return "brief description"; }
    
    int execute();
    void help() { m->mothurOut(getHelpString()); }
    
private:
    bool abort, allLines, wilc, wilcsamename, curv, subject, normMillion, pairwise, runAll;
    string  sharedfile, designfile, mclass, subclass, rankTec, multiClassStrat, sets, inputfile, clrfile, format;
    vector<string> outputNames, Sets;
    set<string> labels;
    double anovaAlpha, wilcoxonAlpha, fBoots, ldaThreshold;
    int nlogs, iters, strict, minC;
    
    int process(SharedRAbundFloatVectors*&, SharedCLRVectors*&, DesignMap&, string combo);
    int normalize(SharedRAbundFloatVectors*&, SharedCLRVectors*&);
    map<int, double> runKruskalWallis(SharedRAbundFloatVectors*&, SharedCLRVectors*&, DesignMap&);
    map<int, double> runWilcoxon(SharedRAbundFloatVectors*&, SharedCLRVectors*&, map<int, double>, map<string, set<string> >& class2SubClasses, map<string, vector<int> >& subClass2GroupIndex, map<string, string>);
    map<int, double> testLDA(SharedRAbundFloatVectors*&, SharedCLRVectors*&, map<int, double>, map<string, vector<int> >& class2GroupIndex, map<string, vector<int> >&);
    vector< vector<double> > getMeans(SharedRAbundFloatVectors*& lookup, SharedCLRVectors*& clr, map<string, vector<int> >& class2GroupIndex);
    
    bool contastWithinClassesOrFewPerClass(vector< vector<double> >&, vector<int> rands, int minCl, map<string, vector<int> > class2GroupIndex,  map<int, string> indexToClass);
    vector< vector<double> > lda(vector< vector<double> >& adjustedLookup, vector<int> rand_s, map<int, string>& indexToClass, vector<string>);
    bool testOTUWilcoxon(map<string, set<string> >& class2SubClasses, vector<float> abunds, map<string, vector<int> >& subClass2GroupIndex, map<string, string>);
    void printResults(vector< vector<double> >, map<int, double>, map<int, double>, string, vector<string>, vector<string>, string);
    void printResultsAll(vector< vector<double> >, map<int, double>, map<int, double>, string, vector<string>, vector<string>, string);
    
    //for testing
    bool printToCoutForRTesting(vector< vector<double> >& adjustedLookup, vector<int> rand_s, map<string, vector<int> >& class2GroupIndex, map<int, double> bins, map<string, vector<int> >&, vector<string>, vector<string>);
    int makeShared(int);
    void runPairwiseAnalysis(DesignMap&);
};

/**************************************************************************************************/




#endif /* defined(__Mothur__lefsecommand__) */