1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
|
#include "sjdbInsertJunctions.h"
#include "sjdbLoadFromStream.h"
#include "sjdbLoadFromFiles.h"
#include "sjdbPrepare.h"
#include "ErrorWarning.h"
#include "GTF.h"
#include "sjdbBuildIndex.h"
#include "streamFuns.h"
#include "genomeParametersWrite.h"
void sjdbInsertJunctions(Parameters & P, Genome & mapGen, Genome & mapGen1, SjdbClass & sjdbLoci)
{
time_t rawtime;
if (mapGen.sjdbN>0 && sjdbLoci.chr.size()==0) {
//load from the saved genome, only if the loading did not happen already (if sjdb insertion happens at the 1st pass, sjdbLoci will be populated
ifstream & sjdbStreamIn = ifstrOpen(P.pGe.gDir+"/sjdbList.out.tab", ERROR_OUT, "SOLUTION: re-generate the genome in pGe.gDir=" + P.pGe.gDir, P);
sjdbLoadFromStream(sjdbStreamIn, sjdbLoci);
sjdbLoci.priority.resize(sjdbLoci.chr.size(),30);
time ( &rawtime );
P.inOut->logMain << timeMonthDayTime(rawtime) << " Loaded database junctions from the generated genome " << P.pGe.gDir+"/sjdbList.out.tab" <<": "<<sjdbLoci.chr.size()<<" total junctions\n\n";
};
if (P.twoPass.pass2) {
//load 1st pass new junctions
//sjdbLoci already contains the junctions from before 1st pass
ifstream sjdbStreamIn ( P.twoPass.pass1sjFile.c_str() );
if (sjdbStreamIn.fail()) {
ostringstream errOut;
errOut << "FATAL INPUT error, could not open input file with junctions from the 1st pass=" << P.twoPass.pass1sjFile <<"\n";
exitWithError(errOut.str(),std::cerr, P.inOut->logMain, EXIT_CODE_INPUT_FILES, P);
};
sjdbLoadFromStream(sjdbStreamIn, sjdbLoci);
sjdbLoci.priority.resize(sjdbLoci.chr.size(),0);
time ( &rawtime );
P.inOut->logMain << timeMonthDayTime(rawtime) << " Loaded database junctions from the 1st pass file: " << P.twoPass.pass1sjFile <<": "<<sjdbLoci.chr.size()<<" total junctions\n\n";
} else if (P.runMode!="genomeGenerate") {
//loading junctions from GTF or tab or from the saved genome is only allowed at the 1st pass
//at the 2nd pass these are already in the sjdbLoci
//with runMode=="genomeGenerate", the junctions from GTF and File are already loaded
sjdbLoadFromFiles(P, sjdbLoci);
GTF gtf(mapGen, P, P.sjdbInsert.outDir, sjdbLoci);
gtf.transcriptGeneSJ(P.sjdbInsert.outDir);
};
//char *Gsj=new char [2*mapGen.sjdbLength*sjdbLoci.chr.size()*(P.var.yes ? 2:1)+1];//array to store junction sequences, will be filled in sjdbPrepare
char *Gsj=new char [2*mapGen.sjdbLength*sjdbLoci.chr.size()+1];//array to store junction sequences, will be filled in sjdbPrepare
sjdbPrepare (sjdbLoci, P, mapGen.chrStart[mapGen.nChrReal], P.sjdbInsert.outDir, mapGen, Gsj);//mapGen.nGenome - change when replacing junctions
time ( &rawtime );
P.inOut->logMain << timeMonthDayTime(rawtime) << " Finished preparing junctions" <<endl;
if (mapGen.sjdbN>P.limitSjdbInsertNsj)
{
ostringstream errOut;
errOut << "Fatal LIMIT error: the number of junctions to be inserted on the fly ="<<mapGen.sjdbN<<" is larger than the limitSjdbInsertNsj="<<P.limitSjdbInsertNsj<<"\n"; errOut << "Fatal LIMIT error: the number of junctions to be inserted on the fly ="<<mapGen.sjdbN<<" is larger than the limitSjdbInsertNsj="<<P.limitSjdbInsertNsj<<"\n";
errOut << "SOLUTION: re-run with at least --limitSjdbInsertNsj "<<mapGen.sjdbN<<"\n";
exitWithError(errOut.str(),std::cerr, P.inOut->logMain, EXIT_CODE_INPUT_FILES, P);
};
//insert junctions into the genome and SA and SAi
sjdbBuildIndex (P, Gsj, mapGen.G, mapGen.SA, (P.twoPass.pass2 ? mapGen.SApass2 : mapGen.SApass1), mapGen.SAi, mapGen, mapGen1);
delete [] Gsj; //junction sequences have been added to G
time ( &rawtime );
P.inOut->logMain << timeMonthDayTime(rawtime) << " ..... finished inserting junctions into genome" <<endl;
//write an extra 0 at the end of the array, filling the last bytes that otherwise are not accessible, but will be written to disk
//this is - to avoid valgrind complaints. Note that SApass1 is allocated with plenty of space to spare.
mapGen.SA.writePacked(mapGen.nSA,0);
if (P.pGe.sjdbInsertSave=="All")
{//save and copy all genome files into sjdbInsert.outDir, except those created above
if (P.pGe.gDir != P.sjdbInsert.outDir)
{
copyFile(P.pGe.gDir+"/chrName.txt", P.sjdbInsert.outDir+"/chrName.txt");
copyFile(P.pGe.gDir+"/chrStart.txt", P.sjdbInsert.outDir+"/chrStart.txt");
copyFile(P.pGe.gDir+"/chrNameLength.txt", P.sjdbInsert.outDir+"/chrNameLength.txt");
copyFile(P.pGe.gDir+"/chrLength.txt", P.sjdbInsert.outDir+"/chrLength.txt");
};
mapGen.pGe.gFileSizes.clear();
mapGen.pGe.gFileSizes.push_back(mapGen.nGenome);
mapGen.pGe.gFileSizes.push_back(mapGen.SA.lengthByte);
genomeParametersWrite(P.sjdbInsert.outDir+("/genomeParameters.txt"), P, ERROR_OUT, mapGen);
ofstream & genomeOut = ofstrOpen(P.sjdbInsert.outDir+"/Genome",ERROR_OUT, P);
fstreamWriteBig(genomeOut,mapGen.G,mapGen.nGenome,P.sjdbInsert.outDir+"/Genome",ERROR_OUT,P);
genomeOut.close();
ofstream & saOut = ofstrOpen(P.sjdbInsert.outDir+"/SA",ERROR_OUT, P);
fstreamWriteBig(saOut,(char*) mapGen.SA.charArray, (streamsize) mapGen.SA.lengthByte, P.sjdbInsert.outDir+"/SA",ERROR_OUT,P);
saOut.close();
ofstream & saIndexOut = ofstrOpen(P.sjdbInsert.outDir+"/SAindex",ERROR_OUT, P);
fstreamWriteBig(saIndexOut, (char*) &P.pGe.gSAindexNbases, sizeof(P.pGe.gSAindexNbases),P.sjdbInsert.outDir+"/SAindex",ERROR_OUT,P);
fstreamWriteBig(saIndexOut, (char*) mapGen.genomeSAindexStart, sizeof(mapGen.genomeSAindexStart[0])*(P.pGe.gSAindexNbases+1),P.sjdbInsert.outDir+"/SAindex",ERROR_OUT,P);
fstreamWriteBig(saIndexOut, mapGen.SAi.charArray, mapGen.SAi.lengthByte,P.sjdbInsert.outDir+"/SAindex",ERROR_OUT,P);
saIndexOut.close();
};
//re-calculate genome-related parameters
P.winBinN = mapGen.nGenome/(1LLU << P.winBinNbits)+1;
};
|