1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
|
#include <stdlib.h>
#include <zlib.h>
#include <assert.h>
#include "maqmap.h"
#include "assemble.h"
/** fill the rolling buffer */
inline void assemble_fill_buffer(gzFile fpin, rolling_buf_t *ab)
{
if (gzeof(fpin) || (ab->is_rounded == 1 && ab->head == ab->tail)) return; // the buffer is full
int n_records, n_bytes;
if (ab->tail >= ab->head) {
n_records = ROLLING_BUF_SIZE - ab->tail;
n_bytes = gzread(fpin, ab->buf + ab->tail, sizeof(maqmap1_t) * n_records);
ab->tail += n_bytes / sizeof(maqmap1_t);
if ((bit32_t)n_bytes < sizeof(maqmap1_t) * n_records) return;
ab->is_rounded = 1; ab->tail = 0;
}
if (ab->head == 0) return;
n_records = ab->head - ab->tail;
n_bytes = gzread(fpin, ab->buf + ab->tail, sizeof(maqmap1_t) * n_records);
ab->tail += n_bytes / sizeof(maqmap1_t);
}
/** get the piled bases at a specified position */
void assemble_get_pos(bit32_t seqid, bit32_t pos, gzFile fpin, rolling_buf_t *ab, assemble_pos_t *ap,
int max_mm, int max_err, int min_q, int is_single, int is_pair_only)
{
int i, is_rounded;
if (max_err < 0) max_err = 60; // default value is set here.
if (max_mm < 0) max_mm = 7; // default value
ap->n_bases = 0;
do {
assemble_fill_buffer(fpin, ab);
if (gzeof(fpin) && ab->is_rounded == 0 && ab->head == ab->tail) return; // end of file and empty buffer
// skip reads that are out of range.
for (i = ab->head, is_rounded = ab->is_rounded; (is_rounded || i != ab->tail)
&& (seqid > ab->buf[i].seqid || (seqid == ab->buf[i].seqid && pos >= (ab->buf[i].pos>>1) + ab->buf[i].size));)
{
if ((++i) == ROLLING_BUF_SIZE) { i = 0; is_rounded = 0; }
}
ab->head = i; ab->is_rounded = is_rounded; // update ab
} while (ab->is_rounded == 0 && ab->head == ab->tail);
assemble_fill_buffer(fpin, ab);
is_rounded = ab->is_rounded;
// fill ap
while ((is_rounded || i != ab->tail) && seqid == ab->buf[i].seqid && pos >= ab->buf[i].pos>>1) {
maqmap1_t *m1 = ab->buf + i;
bit32_t map_qual = is_single? m1->seq[MAX_READLEN-1] : m1->map_qual;
int is_pair_rm = (is_pair_only && (m1->flag != 0 && m1->flag != (PAIRFLAG_FR|PAIRFLAG_PAIRED)))? 1 : 0;
if (m1->info2 <= max_err && (m1->info1&0xf) <= max_mm && map_qual >= (bit32_t)min_q && pos < (m1->pos>>1) + m1->size
&& (m1->flag&PAIRFLAG_SW) == 0 && !is_pair_rm)
{
if (ap->n_bases == ap->m_bases) {
ap->m_bases += 0x1000;
ap->bases = (assemble_posinfo_t*)realloc(ap->bases, ap->m_bases * sizeof(assemble_posinfo_t));
}
assemble_posinfo_t *p = ap->bases + ap->n_bases;
bit32_t base_qual = m1->seq[pos - (m1->pos>>1)]&0x3f;
bit32_t base = m1->seq[pos - (m1->pos>>1)]>>6&0x3;
bit32_t qual = (base_qual < map_qual)? base_qual : map_qual;
bit32_t is_present = (m1->seq[pos - (m1->pos>>1)] == 0)? 0 : 1;
bit32_t mm = m1->info1>>4&0xf;
if (qual < 1) qual = 1;
bit32_t strand = m1->pos&1;
p->info = (qual<<24) | (is_present<<22) | (mm<<19) | (strand<<18) | (base<<16) | (base_qual<<8) | map_qual;
p->c[0] = m1->c[0]; p->c[1] = m1->c[1];
p->pos = (m1->pos&1)? m1->size - 1 - (pos - (m1->pos>>1)) : pos - (m1->pos>>1);
++ap->n_bases;
}
if ((++i) == ROLLING_BUF_SIZE) { i = 0; is_rounded = 0; }
}
}
void assemble_get_indelpos(bit32_t seqid, bit32_t pos, gzFile fpin, rolling_buf_t *ab, assemble_indelpos_t *ai)
{
int i, is_rounded;
// initialization
ai->n_reads = ai->n_types = ai->n_ungap = 0;
memset(ai->ins_bases[0], 0, sizeof(int) * 4 * MAX_READLEN);
do {
assemble_fill_buffer(fpin, ab);
if (gzeof(fpin) && ab->is_rounded == 0 && ab->head == ab->tail) return; // end of file and empty buffer
// skip reads that are out of range.
for (i = ab->head, is_rounded = ab->is_rounded; (is_rounded || i != ab->tail)
&& (seqid > ab->buf[i].seqid || (seqid == ab->buf[i].seqid && pos >= (ab->buf[i].pos>>1) + ab->buf[i].size));)
{
if ((++i) == ROLLING_BUF_SIZE) { i = 0; is_rounded = 0; }
}
ab->head = i; ab->is_rounded = is_rounded; // update ab
} while (ab->is_rounded == 0 && ab->head == ab->tail);
assemble_fill_buffer(fpin, ab);
is_rounded = ab->is_rounded;
// fill ai
while ((is_rounded || i != ab->tail) && seqid == ab->buf[i].seqid && pos >= ab->buf[i].pos>>1) {
maqmap1_t *m1 = ab->buf + i;
if (pos < (m1->pos>>1) + m1->size) {
++ai->n_reads;
if ((m1->flag & PAIRFLAG_SW) && (signed char)m1->seq[MAX_READLEN-1] != 0) { // indel
if (pos == (m1->pos>>1) + m1->map_qual) {
int j;
bit8_t b = (bit8_t)m1->seq[MAX_READLEN-1]; // force to be positive
for (j = 0; j != ai->n_types; ++j)
if (b == (ai->indels[j]&0xff)) break;
assert(j < 256);
if (j < ai->n_types) { // exist
if (m1->pos&1) {
ai->indels[j] += 1ull<<48;
ai->indels[j] += 1ull<<16;
} else {
ai->indels[j] += 1ull<<48;
ai->indels[j] += 1ull<<32;
}
} else { // new type
ai->indels[j] = 1ull<<48 | b | ((m1->pos&1)? 1ull<<16 : 1ull<<32);
++ai->n_types;
}
if ((signed char)b > 0) { // insertion, then get inserted bases
for (j = 0; j < b; ++j)
++ai->ins_bases[j][m1->seq[j+m1->map_qual]>>6&3];
}
}
} else { // non-indel
if (pos > (m1->pos>>1) && ai->n_ungap < 255) {
ai->n_mm[ai->n_ungap] = m1->info1&0xf;
ai->beg_pos[ai->n_ungap] = pos - (m1->pos>>1);
ai->end_pos[ai->n_ungap++] = m1->size - (pos - (m1->pos>>1));
}
}
}
if ((++i) == ROLLING_BUF_SIZE) { i = 0; is_rounded = 0; }
}
}
|