1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
|
#include "SoloFeature.h"
#include "streamFuns.h"
#include "ErrorWarning.h"
#include "SoloFeatureTypes.h"
#include "serviceFuns.cpp"
void SoloFeature::loadRawMatrix()
{
//make directories
if (P.runModeIn.size()<3) {
string errOut = "Exiting because of fatal PARAMETER error: --runMode soloCellFiltering should contain paths to count matrix input directorry and output prefix.";
errOut += "\nSOLUTION: re-run with --runMode soloCellFiltering </path/to/raw/count/dir/> </path/to/output/prefix>\n";
exitWithError(errOut, std::cerr, P.inOut->logMain, EXIT_CODE_PARAMETER, P);
};
string inputPrefix= P.runModeIn[1] + '/';
outputPrefix= P.runModeIn[2];
outputPrefixFiltered= outputPrefix;
/////////////////////////////////////////////////////////////
//load counting matrix
string matrixFileName=inputPrefix+pSolo.outFileNames[3];
ifstream &matStream=ifstrOpen(matrixFileName, ERROR_OUT, "SOLUTION: check path and permission for the matrix file" + matrixFileName, P);
//header
while (matStream.peek() == '%') {
string dummy;
matStream.ignore(numeric_limits<streamsize>::max(), '\n');
};
uint32 nCB1; //number of features read from file
uint64 nTot; //total number of entries
matStream >> featuresNumber >> nCB1 >> nTot;
if (nTot==0) {
exitWithError("Exiting because of fatal INPUT FILE error: no counts detected in " + matrixFileName + \
"\nSOLUTION: check the formatting of the matrix file.\n", \
std::cerr, P.inOut->logMain, EXIT_CODE_PARAMETER, P);
};
/* do not need it - read the information from features file
if (nfeat1 != featuresNumber) {
ostringstream errOut;
errOut <<"Exiting because of fatal INPUT FILE error: number of features in "<< matrixFileName <<": "<< nfeat1;
errOut <<"\n is not equal to the number of features from annotations from genome index: " << featuresNumber << "\n";
errOut <<"SOLUTION: use the same genome index that was use in the STARsolo run that generated the matrix\n";
exitWithError(errOut.str(), std::cerr, P.inOut->logMain, EXIT_CODE_PARAMETER, P);
};
*/
//struct {uint32 gene; uint32 cb; uint32 count} matEntry;
/*vector<array<uint32,3>> matSparse(nTot);
for (uint64 ii=0; ii<nTot; ii++) {
for (uint32 jj=0; jj<3; jj++)
matStream >> matSparse[ii][jj];
};
//sort by cell index
std::sort(matSparse.begin(), matSparse.end(), [](const array<uint32,3> &m1, const array<uint32,3> &m2) {
return m1[1] < m2[1] ;
});
*/
countMatStride=3; //gene, cell, count. Recording cell at shift=1 is temporary: later will replace cell with count
countCellGeneUMI.resize(nTot*countMatStride,0);
for (uint64 ii=0; ii<nTot; ii++) {
matStream >> countCellGeneUMI[ii*countMatStride+0];//gene
matStream >> countCellGeneUMI[ii*countMatStride+1];//cell
matStream >> countCellGeneUMI[ii*countMatStride+2];//count: for now, only allow one value per cell/gene
--countCellGeneUMI[ii*countMatStride+0];
--countCellGeneUMI[ii*countMatStride+1];
};
qsort((void*) countCellGeneUMI.data(), nTot, countMatStride*sizeof(countCellGeneUMI[0]), funCompareTypeSecondFirst<uint32>);
//count number of detected cell
nCB=0;
uint32 ciprev=(uint32) -1;
for (uint32 ii=0; ii<nTot; ii++) {
uint32 ci1=countCellGeneUMI[ii*countMatStride+1];
if (ci1!=ciprev) {//new cell
ciprev=ci1;
nCB++;
};
};
indCB.resize(nCB);
countCellGeneUMIindex.resize(nCB);
nUMIperCB.resize(nCB,0);
nGenePerCB.resize(nCB,0);
nReadPerCB.resize(nCB,0);
nCB=(uint32) -1;
ciprev=(uint32) -1;
for (uint32 ii=0; ii<nTot; ii++) {
uint32 ci1 = countCellGeneUMI[ii*countMatStride+1];
if (ci1 != ciprev) {//new cell
ciprev = ci1;
nCB++;
indCB[nCB] = ci1;
countCellGeneUMIindex[nCB] = ii*countMatStride;
};
nGenePerCB[nCB]++;
nUMIperCB[nCB] += countCellGeneUMI[ii*countMatStride+2];
countCellGeneUMI[ii*countMatStride+1]=countCellGeneUMI[ii*countMatStride+2];//replace cell with count to keep standard convention about countCellGeneUMI
};
{//load barcodes
ifstream &wlstream = ifstrOpen(inputPrefix+pSolo.outFileNames[2], ERROR_OUT, "SOLUTION: check the path and permissions of the barcodes file", P);
pSolo.cbWLstr.resize(nCB1);
for (auto &cb: pSolo.cbWLstr)
std::getline(wlstream, cb);
};
{//copy features
std::ifstream &infeat = ifstrOpen(inputPrefix + pSolo.outFileNames[1], ERROR_OUT, "SOLUTION: check the path and permissions of the features file", P);
createDirectory(outputPrefixFiltered, P.runDirPerm, "Solo output directory", P);
std::ofstream &outfeat = ofstrOpen(outputPrefixFiltered + pSolo.outFileNames[1], ERROR_OUT, P);
outfeat << infeat.rdbuf();
outfeat.close();
};
return;
};
|