File: ReadsFileParser.h

package info (click to toggle)
perm 0.4.0-8
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 976 kB
  • sloc: cpp: 13,499; makefile: 98; sh: 12
file content (103 lines) | stat: -rw-r--r-- 3,463 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#pragma once
#ifndef READS_FILE_PARSER_H_
#define READS_FILE_PARSER_H_

#ifndef MAX_PATH
#define MAX_PATH 2048
#endif

#ifndef MAX_CHAR_PER_LINE
const int MAX_CHAR_PER_LINE = 5000;
#endif

const int READS_INPUT_BUFFER_SIZE = 20000000;

#ifndef MAX_READ_SET_CAPACITY
const int MAX_READ_SET_CAPACITY = 30000000;
#endif

#include "ReadsQualScores.h"
#include "Filename.h"
#include "FileInputBuffer.h"
#include "FileOutputBuffer.h"
#include "ShortReadUtil.h"
#include "chdir.h"
#include <iostream>
#include <string>
using namespace std;

/*
* This class provides function to read the next reads from different reads file format.
*/
class CReadsFileParser
{
public:
    CReadsFileParser(void);
    virtual ~CReadsFileParser(void);
    char openAFileReady2GetRead(const char* filename, const char* fileFormat,\
                                unsigned int readStartIndex, unsigned int uiRead_Length,
                                bool bDiscardReadsWN, FileOutputBuffer* pBadReadBuf = NULL);
    // read in a short read file from different format
    virtual char* get_Next_Read(void);
    virtual void print_Next_Read(void);
    ifstream ifile;
    FileInputBuffer* pBuf;
    FileOutputBuffer* pOBuf;
    char InputFile[MAX_PATH];
    char caNextReadTag[MAX_CHAR_PER_LINE];
    char caNextRead[MAX_CHAR_PER_LINE];
    char caNextReadQSs[MAX_CHAR_PER_LINE];
    char cFileType;
    bool bDiscardReadWN;
    unsigned int readStartIndex;
    unsigned int uiRead_Length;
protected:
    // get the universal read Id and store in vector
    // inline void save_next_read_id(const char* tagLine);
    // get a short read from fasta format
    char* get_Next_Read_From_Fasta(void);
    // get a short read from csfasta format
    char* get_Next_Read_From_csFasta(void);
    // get a short read from fastq format for Illumina read (seq only)
    char* get_Next_Read_From_Fastq(void);
    // get a short read from fastq format for SOLid read (seq only)
    char* get_Next_Read_From_csFastq(void);
private:
    int initialization(void);
    // sub functions for get_Next_Read_From_(cs)Fastq
    inline bool getNextSeqNameInFq(FileInputBuffer* pBuf, char* caBuf);
    inline bool getNextSeqInFq(FileInputBuffer* pBuf, char* caBuf, unsigned int expLength);
    inline bool getNextLine(FileInputBuffer* pBuf, char* caBuf, const char exp1stChar);
    inline bool getNextQScoreInFq(FileInputBuffer* pBuf, char* caBuf, unsigned int expLength);
};
/*
//estimate number of read
unsigned int estimateNoOfReads(const char* fileName, const char* fileFormat);
unsigned int estimateNoOfReads_From_Fasta(const char* fileName);
unsigned int estimateNoOfReads_From_Fastq(const char* fileName);
*/
void getReadsFileFormat(const char* fileName, char* fileFormat);
char getReadsFileFormatSymbol(const char* InputFile, const char* fileFormat);
char getReadsFileFormatSymbol(const char* InputFile);
bool is_csFastq_format(const char* fileName, const char* fileFormat = "");
bool is_colorspace_reads(const char* fileName);

inline bool hasCsfqExtName(const char* fileName)
{
    if (hasTheExtName(fileName, ".csfastq") || \
            hasTheExtName(fileName, ".csfq")) {
        return(true);
    } else {
        return(false);
    }
}

inline bool hasFqExtName(const char* fileName)
{
    bool bFqFormat = hasTheExtName(fileName, ".fq") || \
                     hasTheExtName(fileName, ".fastq") || \
                     hasTheExtName(fileName, ".fastqsanger");
    return(bFqFormat);
}

#endif