1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
|
#ifndef COLLAPSED_CELL_OPTIMIZER_HPP
#define COLLAPSED_CELL_OPTIMIZER_HPP
#include <unordered_map>
#include <unordered_set>
#include <functional>
#include <thread>
#include <boost/filesystem.hpp>
#include "ReadExperiment.hpp"
#include "SalmonOpts.hpp"
#include "GZipWriter.hpp"
#include "EquivalenceClassBuilder.hpp"
#include "AlevinOpts.hpp"
#include "SingleCellProtocols.hpp"
#include "WhiteList.hpp"
#include "DedupUMI.hpp"
#include "TranscriptGroup.hpp"
#include "cuckoohash_map.hh"
#include "Eigen/Dense"
#include "concurrentqueue.h"
#include <boost/math/special_functions/digamma.hpp>
#include <boost/random.hpp>
namespace bfs = boost::filesystem;
using JqueueT = moodycamel::ConcurrentQueue<uint32_t>;
using eqMapT = libcuckoo::cuckoohash_map<TranscriptGroup, SCTGValue, TranscriptGroupHasher>;
using tgrouplabelt = std::vector<uint32_t>;
using tgroupweightvec = std::vector<double>;
using SCExpT = ReadExperiment<EquivalenceClassBuilder<SCTGValue>>;
using EqMapT = libcuckoo::cuckoohash_map<TranscriptGroup, SCTGValue, TranscriptGroupHasher>;
constexpr double digammaMin = 1e-10;
struct CellState {
bool inActive;
};
class CollapsedCellOptimizer {
public:
using VecType = std::vector<std::atomic<double>>;
using SerialVecType = std::vector<double>;
CollapsedCellOptimizer();
template <class ProtocolT>
bool optimize(EqMapT& freqMap,
spp::sparse_hash_map<uint32_t, uint32_t>& txpToGeneMap,
spp::sparse_hash_map<std::string, uint32_t>& geneIdxMap,
AlevinOpts<ProtocolT>& aopt,
GZipWriter& gzw,
std::vector<std::string>& trueBarcodes,
std::vector<uint32_t>& umiCount,
CFreqMapT& freqCounter,
size_t numLowConfidentBarcode);
};
bool runPerCellEM(double& totalNumFrags, size_t numGenes,
CollapsedCellOptimizer::SerialVecType& alphas,
const CollapsedCellOptimizer::SerialVecType& priorAlphas,
std::vector<SalmonEqClass>& salmonEqclasses,
std::shared_ptr<spdlog::logger>& jointlog,
bool initUniform, bool useVBEM);
void optimizeCell(std::vector<std::string>& trueBarcodes,
const std::vector<std::vector<double>>& priorAlphas,
std::atomic<uint32_t>& barcode,
size_t totalCells, uint32_t umiEditDistance, eqMapT& eqMap,
std::deque<std::pair<TranscriptGroup, uint32_t>>& orderedTgroup,
std::shared_ptr<spdlog::logger>& jointlog,
std::vector<uint32_t>& umiCount,
std::vector<CellState>& skippedCB,
bool verbose, GZipWriter& gzw, bool noEM, bool useVBEM,
bool quiet, std::atomic<double>& totalDedupCounts,
std::atomic<uint32_t>& totalExpGeneCounts, double priorWeight,
spp::sparse_hash_map<uint32_t, uint32_t>& txpToGeneMap,
uint32_t numGenes, uint32_t umiLength,
uint32_t numBootstraps, uint32_t numGibbsSamples,
bool naiveEqclass, bool dumpUmiGraph,
bool dumpCellEq, bool useAllBootstraps,
bool initUniform, CFreqMapT& freqCounter, bool dumpArboFragCounts,
spp::sparse_hash_set<uint32_t>& mRnaGenes,
spp::sparse_hash_set<uint32_t>& rRnaGenes,
std::atomic<uint64_t>& totalUniEdgesCounts,
std::atomic<uint64_t>& totalBiEdgesCounts);
using VecT = CollapsedCellOptimizer::SerialVecType;
#endif // COLLAPSED_CELL_OPTIMIZER_HPP
|