File: sjdbInsertJunctions.cpp

package info (click to toggle)
rna-star 2.7.11b%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 3,492 kB
  • sloc: cpp: 21,951; awk: 827; ansic: 457; makefile: 192; sh: 31
file content (102 lines) | stat: -rw-r--r-- 6,174 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#include "sjdbInsertJunctions.h"
#include "sjdbLoadFromStream.h"
#include "sjdbLoadFromFiles.h"
#include "sjdbPrepare.h"
#include "ErrorWarning.h"
#include "GTF.h"
#include "sjdbBuildIndex.h"
#include "streamFuns.h"
#include "genomeParametersWrite.h"

void sjdbInsertJunctions(Parameters & P, Genome & mapGen, Genome & mapGen1, SjdbClass & sjdbLoci)
{
    time_t rawtime;

    if (mapGen.sjdbN>0 && sjdbLoci.chr.size()==0) {
        //load from the saved genome, only if the loading did not happen already (if sjdb insertion happens at the 1st pass, sjdbLoci will be populated
        ifstream & sjdbStreamIn = ifstrOpen(P.pGe.gDir+"/sjdbList.out.tab", ERROR_OUT, "SOLUTION: re-generate the genome in pGe.gDir=" + P.pGe.gDir, P);
        sjdbLoadFromStream(sjdbStreamIn, sjdbLoci);
        sjdbLoci.priority.resize(sjdbLoci.chr.size(),30);
        time ( &rawtime );
        P.inOut->logMain << timeMonthDayTime(rawtime) << "   Loaded database junctions from the generated genome " << P.pGe.gDir+"/sjdbList.out.tab" <<": "<<sjdbLoci.chr.size()<<" total junctions\n\n";
    };

    if (P.twoPass.pass2) {
        //load 1st pass new junctions
        //sjdbLoci already contains the junctions from before 1st pass
        ifstream sjdbStreamIn ( P.twoPass.pass1sjFile.c_str() );
        if (sjdbStreamIn.fail()) {
            ostringstream errOut;
            errOut << "FATAL INPUT error, could not open input file with junctions from the 1st pass=" << P.twoPass.pass1sjFile <<"\n";
            exitWithError(errOut.str(),std::cerr, P.inOut->logMain, EXIT_CODE_INPUT_FILES, P);
        };
        sjdbLoadFromStream(sjdbStreamIn, sjdbLoci);
        sjdbLoci.priority.resize(sjdbLoci.chr.size(),0);
        time ( &rawtime );
        P.inOut->logMain << timeMonthDayTime(rawtime) << "   Loaded database junctions from the 1st pass file: " << P.twoPass.pass1sjFile <<": "<<sjdbLoci.chr.size()<<" total junctions\n\n";
    } else if (P.runMode!="genomeGenerate") {
        //loading junctions from GTF or tab or from the saved genome is only allowed at the 1st pass
        //at the 2nd pass these are already in the sjdbLoci
        //with runMode=="genomeGenerate", the junctions from GTF and File are already loaded
        sjdbLoadFromFiles(P, sjdbLoci);
        GTF gtf(mapGen, P, P.sjdbInsert.outDir, sjdbLoci);
        gtf.transcriptGeneSJ(P.sjdbInsert.outDir);
    };

    //char *Gsj=new char [2*mapGen.sjdbLength*sjdbLoci.chr.size()*(P.var.yes ? 2:1)+1];//array to store junction sequences, will be filled in sjdbPrepare
    char *Gsj=new char [2*mapGen.sjdbLength*sjdbLoci.chr.size()+1];//array to store junction sequences, will be filled in sjdbPrepare

    sjdbPrepare (sjdbLoci, P, mapGen.chrStart[mapGen.nChrReal], P.sjdbInsert.outDir, mapGen, Gsj);//mapGen.nGenome - change when replacing junctions
    time ( &rawtime );
    P.inOut->logMain  << timeMonthDayTime(rawtime) << "   Finished preparing junctions" <<endl;

    if (mapGen.sjdbN>P.limitSjdbInsertNsj)
    {
        ostringstream errOut;
        errOut << "Fatal LIMIT error: the number of junctions to be inserted on the fly ="<<mapGen.sjdbN<<" is larger than the limitSjdbInsertNsj="<<P.limitSjdbInsertNsj<<"\n";                errOut << "Fatal LIMIT error: the number of junctions to be inserted on the fly ="<<mapGen.sjdbN<<" is larger than the limitSjdbInsertNsj="<<P.limitSjdbInsertNsj<<"\n";
        errOut << "SOLUTION: re-run with at least --limitSjdbInsertNsj "<<mapGen.sjdbN<<"\n";
        exitWithError(errOut.str(),std::cerr, P.inOut->logMain, EXIT_CODE_INPUT_FILES, P);
    };
    //insert junctions into the genome and SA and SAi
    sjdbBuildIndex (P, Gsj, mapGen.G, mapGen.SA, (P.twoPass.pass2 ? mapGen.SApass2 : mapGen.SApass1), mapGen.SAi, mapGen, mapGen1);
    delete [] Gsj; //junction sequences have been added to G
    time ( &rawtime );
    P.inOut->logMain     << timeMonthDayTime(rawtime) << " ..... finished inserting junctions into genome" <<endl;

    //write an extra 0 at the end of the array, filling the last bytes that otherwise are not accessible, but will be written to disk
    //this is - to avoid valgrind complaints. Note that SApass1 is allocated with plenty of space to spare.
    mapGen.SA.writePacked(mapGen.nSA,0);

    if (P.pGe.sjdbInsertSave=="All")
    {//save and copy all genome files into sjdbInsert.outDir, except those created above
        if (P.pGe.gDir != P.sjdbInsert.outDir)
        {
            copyFile(P.pGe.gDir+"/chrName.txt", P.sjdbInsert.outDir+"/chrName.txt");
            copyFile(P.pGe.gDir+"/chrStart.txt", P.sjdbInsert.outDir+"/chrStart.txt");
            copyFile(P.pGe.gDir+"/chrNameLength.txt", P.sjdbInsert.outDir+"/chrNameLength.txt");
            copyFile(P.pGe.gDir+"/chrLength.txt", P.sjdbInsert.outDir+"/chrLength.txt");
        };

        mapGen.pGe.gFileSizes.clear();
        mapGen.pGe.gFileSizes.push_back(mapGen.nGenome);
        mapGen.pGe.gFileSizes.push_back(mapGen.SA.lengthByte);
        genomeParametersWrite(P.sjdbInsert.outDir+("/genomeParameters.txt"), P, ERROR_OUT, mapGen);

        ofstream & genomeOut = ofstrOpen(P.sjdbInsert.outDir+"/Genome",ERROR_OUT, P);
        fstreamWriteBig(genomeOut,mapGen.G,mapGen.nGenome,P.sjdbInsert.outDir+"/Genome",ERROR_OUT,P);
        genomeOut.close();

        ofstream & saOut = ofstrOpen(P.sjdbInsert.outDir+"/SA",ERROR_OUT, P);
        fstreamWriteBig(saOut,(char*) mapGen.SA.charArray, (streamsize) mapGen.SA.lengthByte, P.sjdbInsert.outDir+"/SA",ERROR_OUT,P);
        saOut.close();

        ofstream & saIndexOut = ofstrOpen(P.sjdbInsert.outDir+"/SAindex",ERROR_OUT, P);
        fstreamWriteBig(saIndexOut, (char*) &P.pGe.gSAindexNbases, sizeof(P.pGe.gSAindexNbases),P.sjdbInsert.outDir+"/SAindex",ERROR_OUT,P);
        fstreamWriteBig(saIndexOut, (char*) mapGen.genomeSAindexStart, sizeof(mapGen.genomeSAindexStart[0])*(P.pGe.gSAindexNbases+1),P.sjdbInsert.outDir+"/SAindex",ERROR_OUT,P);
        fstreamWriteBig(saIndexOut,  mapGen.SAi.charArray, mapGen.SAi.lengthByte,P.sjdbInsert.outDir+"/SAindex",ERROR_OUT,P);
        saIndexOut.close();
    };

    //re-calculate genome-related parameters
    P.winBinN = mapGen.nGenome/(1LLU << P.winBinNbits)+1;
};