1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
|
#ifndef Mothur_subsample_h
#define Mothur_subsample_h
//
// subsample.h
// Mothur
//
// Created by Sarah Westcott on 4/2/12.
// Copyright (c) 2012 Schloss Lab. All rights reserved.
//
#include "mothurout.h"
#include "rabundvector.hpp"
#include "ordervector.hpp"
#include "treemap.h"
#include "tree.h"
#include "counttable.h"
#include "sharedrabundvectors.hpp"
//subsampling overwrites the sharedRabunds. If you need to reuse the original use the getSamplePreserve function.
class SubSample {
public:
SubSample() { m = MothurOut::getInstance(); }
~SubSample() = default;
vector<string> getSample(SharedRAbundVectors*&, int); //returns the bin labels for the subsample. Overwrites original vector passed in, if you need to preserve it deep copy first.
vector<string> getSampleWithReplacement(SharedRAbundVectors*&, int); //returns the bin labels for the subsample. Overwrites original vector passed in, if you need to preserve it deep copy first.
vector<string> getSample(vector<SharedRAbundVector*>&, int, vector<string>); //returns the bin labels for the subsample. Overwrites original vector passed in, if you need to preserve it deep copy first.
vector<string> getSampleWithReplacement(vector<SharedRAbundVector*>&, int, vector<string>); //returns the bin labels for the subsample. Overwrites original vector passed in, if you need to preserve it deep copy first.
Tree* getSample(Tree*, CountTable*, CountTable*, int, vector<string>&); //creates new subsampled tree. Uses first counttable to fill new counttable with sabsampled seqs. Sets groups of seqs not in subsample to "doNotIncludeMe".
Tree* getSampleWithReplacement(Tree*, CountTable*, CountTable*, int, vector<string>&); //creates new subsampled tree. Uses first counttable to fill new counttable with sabsampled seqs. Sets groups of seqs not in subsample to "doNotIncludeMe".
int getSample(SAbundVector*&, int); //destroys sabundvector passed in, so copy it if you need it
int getSampleWithReplacement(SAbundVector*&, int); //destroys sabundvector passed in, so copy it if you need it
int getSample(RAbundVector*&, int); //destroys sabundvector passed in, so copy it if you need it
int getSampleWithReplacement(RAbundVector*&, int); //destroys sabundvector passed in, so copy it if you need it
CountTable getSample(CountTable&, int, vector<string>, bool persample); // if persample then subsample 'size' members from each group - bygroup(same number sampled from each group), returns subsampled. If not persample then subsample 'size' members from the set of groups passed in.
CountTable getSampleWithReplacement(CountTable&, int, vector<string>, bool persample); // if persample then subsample 'size' members from each group - bygroup(same number sampled from each group), returns subsampled. If not persample then subsample 'size' members from the set of groups passed in.
GroupMap getSample(GroupMap&, int, vector<string> groupsWanted, bool persample); // if persample then subsample 'size' members from each group - bygroup(same number sampled from each group), returns subsampled. If not persample then subsample 'size' members from the set of groups passed in.
GroupMap getSample(GroupMap&, int size); //returns subsampled GroupMap with 'size' members
set<long long> getWeightedSample(map<long long, long long>&, long long); //map of sequence names -> weight (could be abundance or some other measure), num to sample
private:
MothurOut* m;
Utils util;
map<string, string> deconvolute(map<string, string> wholeSet, vector<string>& subsampleWanted); //returns new nameMap containing only subsampled names, and removes redundants from subsampled wanted because it makes the new nameMap.
GroupMap getSample(GroupMap&, int, vector<string> groupsWanted);
CountTable getSample(CountTable&, int, vector<string>); //subsample a countTable bygroup(same number sampled from each group), returns subsampled countTable
CountTable getSampleWithReplacement(CountTable&, int, vector<string>); //subsample a countTable bygroup(same number sampled from each group), returns subsampled countTable
};
#endif
|