1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
|
#ifndef BWAMEM_STATIC_FUNCS_HPP
#define BWAMEM_STATIC_FUNCS_HPP
extern unsigned char nst_nt4_table[256];
char const* bwa_pg = "cha";
/******* STUFF THAT IS STATIC IN BWAMEM THAT WE NEED HERE --- Just re-define it *************/
#define intv_lt(a, b) ((a).info < (b).info)
KSORT_INIT(mem_intv, bwtintv_t, intv_lt)
typedef struct {
bwtintv_v mem, mem1, *tmpv[2];
} smem_aux_t;
static smem_aux_t *smem_aux_init()
{
smem_aux_t *a;
a = static_cast<smem_aux_t*>(calloc(1, sizeof(smem_aux_t)));
a->tmpv[0] = static_cast<bwtintv_v*>(calloc(1, sizeof(bwtintv_v)));
a->tmpv[1] = static_cast<bwtintv_v*>(calloc(1, sizeof(bwtintv_v)));
return a;
}
static void smem_aux_destroy(smem_aux_t *a)
{
free(a->tmpv[0]->a); free(a->tmpv[0]);
free(a->tmpv[1]->a); free(a->tmpv[1]);
free(a->mem.a); free(a->mem1.a);
free(a);
}
static void mem_collect_intv(const SalmonOpts& sopt, const mem_opt_t *opt, SalmonIndex* sidx, int len, const uint8_t *seq, smem_aux_t *a)
{
const bwt_t* bwt = sidx->bwaIndex()->bwt;
int i, k, x = 0, old_n;
int start_width = (opt->flag & MEM_F_SELF_OVLP)? 2 : 1;
int split_len = (int)(opt->min_seed_len * opt->split_factor + .499);
a->mem.n = 0;
// first pass: find all SMEMs
if (sidx->hasAuxKmerIndex()) {
KmerIntervalMap& auxIdx = sidx->auxIndex();
uint32_t klen = auxIdx.k();
while (x < len) {
if (seq[x] < 4) {
// Make sure there are at least k bases left
if (len - x < klen) { x = len; continue; }
// search for this key in the auxiliary index
KmerKey kmer(const_cast<uint8_t*>(&(seq[x])), klen);
auto it = auxIdx.find(kmer);
// if we can't find it, move to the next key
if (it == auxIdx.end()) { ++x; continue; }
// otherwise, start the search using the initial interval @it->second from the hash
int xb = x;
x = bwautils::bwt_smem1_with_kmer(bwt, len, seq, x, start_width, it->second, &a->mem1, a->tmpv);
for (i = 0; i < a->mem1.n; ++i) {
bwtintv_t *p = &a->mem1.a[i];
int slen = (uint32_t)p->info - (p->info>>32); // seed length
if (slen >= opt->min_seed_len)
kv_push(bwtintv_t, a->mem, *p);
}
} else ++x;
}
} else {
while (x < len) {
if (seq[x] < 4) {
x = bwt_smem1(bwt, len, seq, x, start_width, &a->mem1, a->tmpv);
for (i = 0; i < a->mem1.n; ++i) {
bwtintv_t *p = &a->mem1.a[i];
int slen = (uint32_t)p->info - (p->info>>32); // seed length
if (slen >= opt->min_seed_len)
kv_push(bwtintv_t, a->mem, *p);
}
} else ++x;
}
}
// For sensitive / extra-sensitive mode only
if (sopt.sensitive or sopt.extraSeedPass) {
// second pass: find MEMs inside a long SMEM
old_n = a->mem.n;
for (k = 0; k < old_n; ++k) {
bwtintv_t *p = &a->mem.a[k];
int start = p->info>>32, end = (int32_t)p->info;
if (end - start < split_len || p->x[2] > opt->split_width) continue;
//int idx = (start + end) >> 1;
bwt_smem1(bwt, len, seq, (start + end)>>1, p->x[2]+1, &a->mem1, a->tmpv);
for (i = 0; i < a->mem1.n; ++i)
if ((uint32_t)a->mem1.a[i].info - (a->mem1.a[i].info>>32) >= opt->min_seed_len)
kv_push(bwtintv_t, a->mem, a->mem1.a[i]);
}
}
// For extra-sensitive mode only
// third pass: LAST-like
if (sopt.extraSeedPass and opt->max_mem_intv > 0) {
x = 0;
while (x < len) {
if (seq[x] < 4) {
if (1) {
bwtintv_t m;
x = bwt_seed_strategy1(bwt, len, seq, x, opt->min_seed_len, opt->max_mem_intv, &m);
if (m.x[2] > 0) kv_push(bwtintv_t, a->mem, m);
} else { // for now, we never come to this block which is slower
x = bwt_smem1a(bwt, len, seq, x, start_width, opt->max_mem_intv, &a->mem1, a->tmpv);
for (i = 0; i < a->mem1.n; ++i)
kv_push(bwtintv_t, a->mem, a->mem1.a[i]);
}
} else ++x;
}
}
// sort
// ks_introsort(mem_intv, a->mem.n, a->mem.a);
}
/******* END OF STUFF THAT IS STATIC IN BWAMEM THAT WE NEED HERE --- Just re-define it *************/
#endif // BWAMEM_STATIC_FUNCS_HPP
|