File: indel_call.cc

package info (click to toggle)
maq 0.7.1-7
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 1,476 kB
  • ctags: 1,130
  • sloc: cpp: 5,025; ansic: 3,333; sh: 3,303; perl: 2,547; makefile: 31
file content (128 lines) | stat: -rw-r--r-- 3,777 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#include <zlib.h>
#include <stdlib.h>
#include <stdio.h>
#include "maqmap.h"
#include "assemble.h"
#include "bfa.h"
#include "stdhash.hh"
#include "algo.hh"
#include "main.h"

#define MIN_INDEL_DIST 10

typedef struct
{
	int coor, score;
	assemble_indelpos_t indelpos;
	int type;
} indel_info_t;

static inline void print_indelpos(const char *name, indel_info_t *ii, nst_bfa1_t *l)
{
	if (ii->indelpos.n_types == 0) return;
	bit64_t x = ii->indelpos.indels[ii->indelpos.n_types-1];
	char indel_bases[MAX_READLEN];
	if (ii->type != 'x') {
		// generate reference sequence
		char s0[33], s1[33];
		int i, j;
		int l_indel = (signed char)(x&0xff);
		for (j = 0, i = (ii->coor > 32)? ii->coor - 32 : 0; i < ii->coor; ++i)
			s0[j++] = (l->mask[i>>5]>>((31-(i&0x1f))<<1)&3)?
				"ACGT"[l->seq[i>>5]>>((31-(i&0x1f))<<1)&3] : 'N';
		s0[j] = 0;
		for (i = ii->coor, j = 0; i < l->ori_len && i < ii->coor + 32; ++i)
			s1[j++] = (l->mask[i>>5]>>((31-(i&0x1f))<<1)&3)?
				"ACGT"[l->seq[i>>5]>>((31-(i&0x1f))<<1)&3] : 'N';
		s1[j] = 0;
		// generate inserted bases
		if (l_indel > 0) {
			for (i = 0; i != l_indel; ++i) {
				int max = -1, max_j = -1;
				for (j = 0; j != 4; ++j) {
					if (ii->indelpos.ins_bases[i][j] > max) {
						max = ii->indelpos.ins_bases[i][j];
						max_j = j;
					}
				}
				if (max == 0) break; // stop
				indel_bases[i] = "ACGT"[max_j];
			}
			indel_bases[i] = 0;
		} else {
			for (i = 0; i < -l_indel; ++i) {
				if (s1[i]) indel_bases[i] = s1[i];
				else break;
			}
			indel_bases[i] = 0;
		}
		// print
		printf("%s\t%d\t%c\t%d\t%d:%s\t%d\t%d\t%s\t%s\t%d\t@", name, ii->coor+1, ii->type, ii->indelpos.n_reads,
			   int((signed char)(x&0xff)), indel_bases, int(x>>16&0xffff), int(x>>32&0xffff), s0, s1, ii->indelpos.n_ungap);
		for (i = 0; i != ii->indelpos.n_ungap; ++i)
			printf("%d,", ii->indelpos.beg_pos[i]);
		printf("\t@");
		for (i = 0; i != ii->indelpos.n_ungap; ++i)
			printf("%d,", ii->indelpos.end_pos[i]);
		printf("\t@");
		for (i = 0; i != ii->indelpos.n_ungap; ++i)
			printf("%d,", ii->indelpos.n_mm[i]);
		printf("\n");
	}
}
int maq_indelpe(int argc, char *argv[])
{
	if (argc < 3) {
		fprintf(stderr, "Usage: maq indelpe <in.ref.bfa> <in.aln.map>\n");
		return 1;
	}
	FILE *fp_bfa = fopen(argv[1], "r");
	gzFile fp_map = gzopen(argv[2], "r");
	rolling_buf_t *buf = (rolling_buf_t*)calloc(1, sizeof(rolling_buf_t));
	maqmap_t *mm = maqmap_read_header(fp_map);
	hash_map_char<int> *hash_map = new hash_map_char<int>;
	nst_bfa1_t *l;
	int k, seqid;
	indel_info_t curr, last;

	buf->buf = (maqmap1_t*)calloc(ROLLING_BUF_SIZE, sizeof(maqmap1_t));
	for (k = 0; k != mm->n_ref; ++k) hash_map->insert(mm->ref_name[k], k);
	while ((l = nst_load_bfa1(fp_bfa)) != 0) {
		if (!hash_map->find(l->name, &seqid)) {
			nst_delete_bfa1(l);
			continue;
		}
		last.type = 'x'; // not set
		for (int i = 0; i != l->len; ++i) {
			for (int j = 31; j >= 0; --j) {
				int coor = (i<<5) | (31 - j);
				if (coor >= l->ori_len) break;
				assemble_get_indelpos(seqid, coor, fp_map, buf, &curr.indelpos);
				if (curr.indelpos.n_types == 0) continue; // no potential indels
				int n = curr.indelpos.n_types;
				bit64_t *p = curr.indelpos.indels;
				algo_sort(n, p);
				curr.score = p[n-1]>>48;
				curr.type = '-';
				if (p[n-1]>>48 >= 2) curr.type = '+';
				if ((p[n-1]>>16&0xffff) && (p[n-1]>>32&0xffff)) curr.type = '*';
				curr.coor = coor;
				if (last.type != 'x') {
					if (curr.coor - last.coor < MIN_INDEL_DIST) {
						if (last.score < curr.score) last.type = '.';
						else curr.type = '.';
					}
				}
				print_indelpos(l->name, &last, l);
				memcpy(&last, &curr, sizeof(indel_info_t));
			}
		}
		print_indelpos(l->name, &last, l);
		nst_delete_bfa1(l);
	}
	delete hash_map;
	gzclose(fp_map);
	fclose(fp_bfa);
	return 0;
}