1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
|
#ifndef CMAPPY_H
#define CMAPPY_H
#include <stdlib.h>
#include <string.h>
#include <zlib.h>
#include "minimap.h"
#include "kseq.h"
KSEQ_DECLARE(gzFile)
typedef struct {
const char *ctg;
int32_t ctg_start, ctg_end;
int32_t qry_start, qry_end;
int32_t blen, mlen, NM, ctg_len;
uint8_t mapq, is_primary;
int8_t strand, trans_strand;
int32_t seg_id;
int32_t n_cigar32;
uint32_t *cigar32;
} mm_hitpy_t;
static inline void mm_reg2hitpy(const mm_idx_t *mi, mm_reg1_t *r, mm_hitpy_t *h)
{
h->ctg = mi->seq[r->rid].name;
h->ctg_len = mi->seq[r->rid].len;
h->ctg_start = r->rs, h->ctg_end = r->re;
h->qry_start = r->qs, h->qry_end = r->qe;
h->strand = r->rev? -1 : 1;
h->mapq = r->mapq;
h->mlen = r->mlen;
h->blen = r->blen;
h->NM = r->blen - r->mlen + r->p->n_ambi;
h->trans_strand = r->p->trans_strand == 1? 1 : r->p->trans_strand == 2? -1 : 0;
h->is_primary = (r->id == r->parent);
h->seg_id = r->seg_id;
h->n_cigar32 = r->p->n_cigar;
h->cigar32 = r->p->cigar;
}
static inline void mm_free_reg1(mm_reg1_t *r)
{
free(r->p);
}
static inline kseq_t *mm_fastx_open(const char *fn)
{
gzFile fp;
fp = fn && strcmp(fn, "-") != 0? gzopen(fn, "r") : gzdopen(fileno(stdin), "r");
return kseq_init(fp);
}
static inline void mm_fastx_close(kseq_t *ks)
{
gzFile fp;
fp = ks->f->f;
kseq_destroy(ks);
gzclose(fp);
}
static inline int mm_verbose_level(int v)
{
if (v >= 0) mm_verbose = v;
return mm_verbose;
}
static inline void mm_reset_timer(void)
{
extern double realtime(void);
mm_realtime0 = realtime();
}
extern unsigned char seq_comp_table[256];
static inline mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char *seq1, const char *seq2, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt)
{
mm_reg1_t *r;
Py_BEGIN_ALLOW_THREADS
if (seq2 == 0) {
r = mm_map(mi, strlen(seq1), seq1, n_regs, b, opt, NULL);
} else {
int _n_regs[2];
mm_reg1_t *regs[2];
char *seq[2];
int i, len[2];
len[0] = strlen(seq1);
len[1] = strlen(seq2);
seq[0] = (char*)seq1;
seq[1] = strdup(seq2);
for (i = 0; i < len[1]>>1; ++i) {
int t = seq[1][len[1] - i - 1];
seq[1][len[1] - i - 1] = seq_comp_table[(uint8_t)seq[1][i]];
seq[1][i] = seq_comp_table[t];
}
if (len[1]&1) seq[1][len[1]>>1] = seq_comp_table[(uint8_t)seq[1][len[1]>>1]];
mm_map_frag(mi, 2, len, (const char**)seq, _n_regs, regs, b, opt, NULL);
for (i = 0; i < _n_regs[1]; ++i)
regs[1][i].rev = !regs[1][i].rev;
*n_regs = _n_regs[0] + _n_regs[1];
regs[0] = (mm_reg1_t*)realloc(regs[0], sizeof(mm_reg1_t) * (*n_regs));
memcpy(®s[0][_n_regs[0]], regs[1], _n_regs[1] * sizeof(mm_reg1_t));
free(regs[1]);
r = regs[0];
}
Py_END_ALLOW_THREADS
return r;
}
static inline char *mappy_revcomp(int len, const uint8_t *seq)
{
int i;
char *rev;
rev = (char*)malloc(len + 1);
for (i = 0; i < len; ++i)
rev[len - i - 1] = seq_comp_table[seq[i]];
rev[len] = 0;
return rev;
}
static char *mappy_fetch_seq(const mm_idx_t *mi, const char *name, int st, int en, int *len)
{
int i, rid;
char *s;
*len = 0;
rid = mm_idx_name2id(mi, name);
if (rid < 0) return 0;
if ((uint32_t)st >= mi->seq[rid].len || st >= en) return 0;
if (en < 0 || (uint32_t)en > mi->seq[rid].len)
en = mi->seq[rid].len;
s = (char*)malloc(en - st + 1);
*len = mm_idx_getseq(mi, rid, st, en, (uint8_t*)s);
for (i = 0; i < *len; ++i)
s[i] = "ACGTN"[(uint8_t)s[i]];
s[*len] = 0;
return s;
}
static mm_idx_t *mappy_idx_seq(int w, int k, int is_hpc, int bucket_bits, const char *seq, int len)
{
const char *fake_name = "N/A";
char *s;
mm_idx_t *mi;
s = (char*)calloc(len + 1, 1);
memcpy(s, seq, len);
mi = mm_idx_str(w, k, is_hpc, bucket_bits, 1, (const char**)&s, (const char**)&fake_name);
free(s);
return mi;
}
#endif
|