File: SingleRead.h

package info (click to toggle)
rsem 1.3.3%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 37,668 kB
  • sloc: cpp: 19,230; perl: 1,326; python: 1,245; ansic: 547; makefile: 186; sh: 154
file content (92 lines) | stat: -rw-r--r-- 2,460 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#ifndef SINGLEREAD
#define SINGLEREAD

#include<cmath>
#include<cstdio>
#include<cstdlib>
#include<cassert>
#include<iostream>
#include<string>

#include "utils.h"
#include "Read.h"

class SingleRead : public Read {
public:
	SingleRead() { readseq = ""; len = 0; }
	SingleRead(const std::string& name, const std::string& readseq) {
		this->name = name;
		this->readseq = readseq;
		this->len = readseq.length();
	}

	bool read(int argc, std::istream* argv[], int flags = 7);
	void write(int argc, std::ostream* argv[]);

	const int getReadLength() const { return len; /*readseq.length();*/ } // If need memory and .length() are guaranteed O(1), use statement in /* */
	const std::string& getReadSeq() const { return readseq; }

	void calc_lq(bool, int); // calculate if this read is low quality. Without calling this function, isLowQuality() will always be false

private:
	int len; // read length
	std::string readseq; // read sequence
};

//If return false, you should not trust the value of any member
bool SingleRead::read(int argc, std::istream* argv[], int flags) {
	std::string line;

	assert(argc == 1);
	if (!getline((*argv[0]), line)) return false;
	if (line[0] != '>') { fprintf(stderr, "Read file does not look like a FASTA file!"); exit(-1); }
	name = "";
	if (flags & 4) { name = line.substr(1); }
	if (!getline((*argv[0]), readseq)) return false;
	len = readseq.length(); // set read length
	if (!(flags & 1)) { readseq = ""; }

	return true;
}

void SingleRead::write(int argc, std::ostream* argv[]) {
	assert(argc == 1);
	(*argv[0])<<">"<<name<<std::endl<<readseq<<std::endl;
}

//calculate if this read is low quality
void SingleRead::calc_lq(bool hasPolyA, int seedLen) {
	low_quality = false;
	if (len < seedLen) { low_quality = true; return; }

	// if no polyA, no need to do the following calculation
	if (!hasPolyA) return;

	assert(readseq != "");

	int numA = 0, numT = 0, numAO = 0, numTO = 0; // numAO : number of A in overlap seed region
	int threshold_1, threshold_2;

	threshold_1 = int(0.9 * len - 1.5 * sqrt(len * 1.0) + 0.5);
	threshold_2 = (OLEN - 1) / 2 + 1;
	for (int i = 0; i < len; i++) {
		if (readseq[i] == 'A') {
			++numA;
			if (i < OLEN) ++numAO;
		}
		if (readseq[i] == 'T') {
			++numT;
			if (i >= len - OLEN) ++numTO;
		}
	}
  
	if (numA >= threshold_1) {
		low_quality = (numAO >= threshold_2);
	}
	else if (numT >= threshold_1) {
		low_quality = (numTO >= threshold_2);
	}
	else low_quality = false;
}

#endif