File: ReadInBitsSet.h

package info (click to toggle)
perm 0.4.0-8
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 976 kB
  • sloc: cpp: 13,499; makefile: 98; sh: 12
file content (77 lines) | stat: -rw-r--r-- 2,846 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#pragma once
#ifndef READINBITSET_H_
#define READINBITSET_H_
#include "ReadsFileParser.h"
#include "ReadInBits.h"
#include "ColorSpaceRead.h"
#include "ShortReadUtil.h"
#include "MismatchScores.h"
#include "ReadsQualScores.h"
#include "Filename.h"
#include "stdafx.h"
#include <vector>
#include <iostream>
#include <string>
using namespace std;
// each time read in one million reads as buffering
const unsigned int BUFFERED_READS_SIZE = 1000000;
// const unsigned int BUFFERED_READS_SIZE = 1000;

class CReadInBitsSet
{
public:
    CReadInBitsSet(void);
    CReadInBitsSet(unsigned int Capacity, unsigned int uiReadLength, unsigned int allowedNumOfNinRead = 0);
    // Generate set given a file with reads and readLength (must be known in advance)
    CReadInBitsSet(const char* InputFile, const char* fileFormat, \
                   unsigned int uiReadStartIndex, unsigned int uiReadLength, unsigned int allowedNumOfNinRead);
    virtual ~CReadInBitsSet();
    int clear(int capacity = 0);

    // get reads from the file and store (append) in a vector. Return how many reads are read-in.
    unsigned int openAFileReady2GetRead(const char* InputFile, const char* fileFormat, unsigned int uiReadStartIndex);
    unsigned int openAFileReady2GetReadQSinQUAL(const char* InputFile, unsigned int readQsLength);
    unsigned int get_next_capacity_reads(int capacity, char sep = ',');
    void ignoreQScores(void);

    void get_read_id(int no, char* readId);
    void save_next_read_id(const char* tagLine, char sep = ',');
    bool save_next_read(const char* readSeq, bool bSOLiDReadFormat);
    inline const char* getQScoresPtr(int readId);

    vector<CReadInBits>* pReadsSet;
    vector<CReadID>* pReadsID; // The vector keep tags of the reads
    bool bDiscardReadWithN;
    unsigned int allowedNumOfNinRead;
    unsigned int  uiRead_Length;
    unsigned int  uiNo_of_Reads;
    unsigned int  uiNo_of_Bad_Reads;
    //A char flag used to record the input file type, example F for fasta, S for .seq.txt, A for .realign
    char cFileType;
    char InputFile[FILENAME_MAX];
    // Keep the info of the quality score
    CReadsQualScores* pQualScores;
    // Keep the info of the best alignments score + # of best alignments
    CMismatchScores* pMismatchScores;
    void setBadReadOutputFile(FileOutputBuffer* pOut);
protected:
    CReadsFileParser parser;
private:
    int initialization(void);
    void handleBadread(void);
};

inline const char* CReadInBitsSet::getQScoresPtr(int readId)
{
    if (this->pQualScores == NULL) {
        return(NULL);
    } else {
        return(this->pQualScores->qScores((unsigned int)readId));
    }
}

// This function prints out the reads that have mapping worse than the missMatchScoreT
int printMissReads(const char* outputfile, CReadInBitsSet& readSet, int missMatchScoreT);
#endif /* CREADSSET_H_ */