File: getUnique.cpp

package info (click to toggle)
rsem 1.3.3%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 37,700 kB
  • sloc: cpp: 19,230; perl: 1,326; python: 1,245; ansic: 547; makefile: 186; sh: 154
file content (83 lines) | stat: -rw-r--r-- 1,741 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#include<cstdio>
#include<cstring>
#include<cstdlib>
#include<cassert>
#include<string>
#include<vector>

#include <stdint.h>
#include "htslib/sam.h"
#include "sam_utils.h"

#include "utils.h"
#include "my_assert.h"

using namespace std;

int nThreads;
string cqname;
samFile *in, *out;
bam_hdr_t *header;
bam1_t *b;
vector<bam1_t*> arr;
bool unaligned;

void output() {
	if (unaligned || arr.size() == 0) return;
	bool isPaired = bam_is_paired(arr[0]);
	if ((isPaired && arr.size() != 2) || (!isPaired && arr.size() != 1)) return;
	for (size_t i = 0; i < arr.size(); ++i) sam_write1(out, header, arr[i]);
}

int main(int argc, char* argv[]) {
	if (argc != 4) {
		printf("Usage: rsem-get-unique number_of_threads unsorted_transcript_bam_input bam_output\n");
		exit(-1);
	}

        nThreads = atoi(argv[1]);
	in = sam_open(argv[2], "r");
	assert(in != 0);
	header = sam_hdr_read(in);
	assert(header != 0);
	out = sam_open(argv[3], "wb");
	assert(out != 0);
	sam_hdr_write(out, header);
	if (nThreads > 1) general_assert(hts_set_threads(out, nThreads) == 0, "Fail to create threads for writing the BAM file!");

	HIT_INT_TYPE cnt = 0;

	cqname = "";
	arr.clear();
	b = bam_init1();
	unaligned = false;

	while (sam_read1(in, header, b) >= 0) {
		if (cqname != bam_get_qname(b)) {
			output();
			cqname = bam_get_qname(b);
			for (size_t i = 0; i < arr.size(); ++i) bam_destroy1(arr[i]);
			arr.clear();
			unaligned = false;
		}

		unaligned = unaligned || bam_is_unmapped(b);
		arr.push_back(bam_dup1(b));

		++cnt;
		if (cnt % 1000000 == 0) { printf("."); fflush(stdout); }
	}

	if (cnt >= 1000000) printf("\n");

	output();

	bam_destroy1(b);
	bam_hdr_destroy(header);
	sam_close(in);
	sam_close(out);

	printf("done!\n");

	return 0;
}