1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178
|
#ifndef BAMITER_H
#define BAMITER_H
/*
*
* bamiter.h
* alignment representation
*
*
*/
#include <stdint.h>
#include <inttypes.h>
#include <pthread.h>
#include "filebuffer.h"
#include "htslib/sam.h"
#include "htslib/faidx.h"
#include "htslib/kstring.h"
#include "htslib/khash.h"
#include "samio.h"
#include "intervaltree.h"
#define BAM_ITR_BUFSZ 1000
typedef struct{
uint32_t n;
uint32_t c; //massive padding for better alignment
uint64_t *d;
} bam_cs_data_t;
typedef struct{
uint8_t prev;
uint8_t next;
uint32_t tid;
uint32_t beg;
uint32_t n;
bam_cs_data_t *x;
} bam_cs_t;
/*
* lower -> higher bits
* 0: 4-bit : char encoding
* 4: 1-bit : rc
* 5: 8-bit : nucleotide qual (64)
* 13: 8-bit : mapping qual (64)
* 21: 8-bit : mm/nh
* 29: 2-bit : paired
* 31: 2-bit : conversion protocol
* 33: 16-bit: query position
* 49: 15-bit: number mismatches/nm
*/
#define BAM_CS_NT_MASK ((uint64_t)((1<<4)-1))
#define BAM_CS_RC_MASK ((uint64_t)((1<<1)-1))
#define BAM_CS_NQ_MASK ((uint64_t)((1<<8)-1))
#define BAM_CS_MQ_MASK (((uint64_t)(1<<8)-1))
#define BAM_CS_MM_MASK (((uint64_t)(1<<8)-1))
#define BAM_CS_PP_MASK (((uint64_t)(1<<2)-1))
#define BAM_CS_CP_MASK (((uint64_t)(1<<2)-1))
#define BAM_CS_QP_MASK (((uint64_t)(1<<16)-1))
#define BAM_CS_NM_MASK (((uint64_t)(1<<15)-1))
//#define BAM_CS_RS_MASK (1<<2)-1
#define BAM_CS_NT_LBIT 0
#define BAM_CS_RC_LBIT 4
#define BAM_CS_NQ_LBIT 5
#define BAM_CS_MQ_LBIT 13
#define BAM_CS_MM_LBIT 21
#define BAM_CS_PP_LBIT 29
#define BAM_CS_CP_LBIT 31
#define BAM_CS_QP_LBIT 33
#define BAM_CS_NM_LBIT 49
//#define BAM_CS_RS_LBIT 62
/*
* encoding of common nucleotides
*/
#define BAM_CS_NT_A 0x1
#define BAM_CS_NT_C 0x2
#define BAM_CS_NT_G 0x4
#define BAM_CS_NT_T 0x8
#define BAM_CS_NT_N 0xF
#define BAM_CS_NT_D 0x0
#define BAM_CS_DEFAULT_REG 1000000
#define BAM_CS_DEFAULT_COVERAGE 30
#define BAM_CS_DEFAULT_MAX_COVERAGE 10000
#define BAM_CS_DEFAULT_CIRCBUF_SZ 10000000
#define METHYL_FWD_STRAND 1
#define METHYL_REV_STRAND 2
#define METHYL_BTH_STRAND 0
//char cop = (char) bam_cigar_opchr(cigar[i]);
//fprintf(stderr, "%c", "=ACMGRSVTWYHKDBN"[bam_seqi(seq, qpos+j)]);
#define BAM_CS_GET_NT(val) (((val) & BAM_CS_NT_MASK) )
#define BAM_CS_GET_RC(val) (((val) >> BAM_CS_RC_LBIT) & BAM_CS_RC_MASK)
#define BAM_CS_GET_NQ(val) (((val) >> BAM_CS_NQ_LBIT) & BAM_CS_NQ_MASK)
#define BAM_CS_GET_MQ(val) (((val) >> BAM_CS_MQ_LBIT) & BAM_CS_MQ_MASK)
#define BAM_CS_GET_MM(val) (((val) >> BAM_CS_MM_LBIT) & BAM_CS_MM_MASK)
#define BAM_CS_GET_PP(val) (((val) >> BAM_CS_PP_LBIT) & BAM_CS_PP_MASK)
#define BAM_CS_GET_CP(val) (((val) >> BAM_CS_CP_LBIT) & BAM_CS_CP_MASK)
#define BAM_CS_GET_QP(val) (((val) >> BAM_CS_QP_LBIT) & BAM_CS_QP_MASK)
#define BAM_CS_GET_NM(val) (((val) >> BAM_CS_NM_LBIT) & BAM_CS_NM_MASK)
#define BAM_CS_GET_RS(val) (((val) >> BAM_CS_RS_LBIT) & BAM_CS_RS_MASK)
typedef struct{
bam_hdr_t *hdr;
uint32_t last_tid;
uint32_t last_beg;
uint32_t last_len;
uint8_t isthreaded;
pthread_mutex_t *mtx;
} bl_bam_methyl_master_t;
typedef struct {
FILE *out;
hts_idx_t *idx;
bam_hdr_t *hdr;
samFile *in;
faidx_t *fai;
intervalforest_t *forest;
uint8_t uniqueonly;
uint8_t isthreaded;
bl_bam_methyl_master_t *ms;
pthread_mutex_t *devmtx;
pthread_mutex_t *faimtx;
pthread_mutex_t *bammtx;
circbuffer_t *cb;
} bl_bam_methyl_worker_t;
typedef struct {
samFile *in;
faidx_t *fai;
hts_idx_t *idx;
bam_hdr_t *hdr;
} bam_info_t;
typedef struct {
uint16_t n;
uint16_t i;
uint16_t maxn;
uint8_t next;
bam1_t *d;
uint8_t isthreaded;
pthread_mutex_t *mtx;
} bl_bam1_buffer_t;
bam_info_t* bl_bamLoadInfo(bam_info_t *nfo, char *bamfn, char *fafn);
bl_bam_methyl_master_t* bl_bamInitMaster(bl_bam_methyl_master_t *ms,
bam_hdr_t *hdr, uint8_t isthreaded, pthread_mutex_t *mtx);
bl_bam_methyl_worker_t* bl_bamInitWorker(bl_bam_methyl_worker_t *wk,
bl_bam_methyl_master_t *ms, FILE *out, bam_info_t *nfo, uint8_t uniqueonly,
uint8_t isthreaded, pthread_mutex_t* devmtx, pthread_mutex_t *bammtx,
circbuffer_t *cb, intervalforest_t *forest);
void* bl_bamCrossSectioMethylWorker(void *args);
void* bl_bamMethylStringWorker(void *args);
void bl_bamDestructInfo(bam_info_t *nfo);
void bl_bamPrintBamrec (htsFile *fp, samrec_t *rec, bam_hdr_t *hdr, pthread_mutex_t *mtx) ;
htsFile* bl_bamOpenFile(char *fn);
bam_hdr_t* bl_bamGetHeader (samheader_t *head, Uint binno);
#endif
|