1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
|
#pragma once
#ifndef READINBITSET_H_
#define READINBITSET_H_
#include "ReadsFileParser.h"
#include "ReadInBits.h"
#include "ColorSpaceRead.h"
#include "ShortReadUtil.h"
#include "MismatchScores.h"
#include "ReadsQualScores.h"
#include "Filename.h"
#include "stdafx.h"
#include <vector>
#include <iostream>
#include <string>
using namespace std;
// each time read in one million reads as buffering
const unsigned int BUFFERED_READS_SIZE = 1000000;
// const unsigned int BUFFERED_READS_SIZE = 1000;
class CReadInBitsSet
{
public:
CReadInBitsSet(void);
CReadInBitsSet(unsigned int Capacity, unsigned int uiReadLength, unsigned int allowedNumOfNinRead = 0);
// Generate set given a file with reads and readLength (must be known in advance)
CReadInBitsSet(const char* InputFile, const char* fileFormat, \
unsigned int uiReadStartIndex, unsigned int uiReadLength, unsigned int allowedNumOfNinRead);
virtual ~CReadInBitsSet();
int clear(int capacity = 0);
// get reads from the file and store (append) in a vector. Return how many reads are read-in.
unsigned int openAFileReady2GetRead(const char* InputFile, const char* fileFormat, unsigned int uiReadStartIndex);
unsigned int openAFileReady2GetReadQSinQUAL(const char* InputFile, unsigned int readQsLength);
unsigned int get_next_capacity_reads(int capacity, char sep = ',');
void ignoreQScores(void);
void get_read_id(int no, char* readId);
void save_next_read_id(const char* tagLine, char sep = ',');
bool save_next_read(const char* readSeq, bool bSOLiDReadFormat);
inline const char* getQScoresPtr(int readId);
vector<CReadInBits>* pReadsSet;
vector<CReadID>* pReadsID; // The vector keep tags of the reads
bool bDiscardReadWithN;
unsigned int allowedNumOfNinRead;
unsigned int uiRead_Length;
unsigned int uiNo_of_Reads;
unsigned int uiNo_of_Bad_Reads;
//A char flag used to record the input file type, example F for fasta, S for .seq.txt, A for .realign
char cFileType;
char InputFile[FILENAME_MAX];
// Keep the info of the quality score
CReadsQualScores* pQualScores;
// Keep the info of the best alignments score + # of best alignments
CMismatchScores* pMismatchScores;
void setBadReadOutputFile(FileOutputBuffer* pOut);
protected:
CReadsFileParser parser;
private:
int initialization(void);
void handleBadread(void);
};
inline const char* CReadInBitsSet::getQScoresPtr(int readId)
{
if (this->pQualScores == NULL) {
return(NULL);
} else {
return(this->pQualScores->qScores((unsigned int)readId));
}
}
// This function prints out the reads that have mapping worse than the missMatchScoreT
int printMissReads(const char* outputfile, CReadInBitsSet& readSet, int missMatchScoreT);
#endif /* CREADSSET_H_ */
|