File: bfc.h

package info (click to toggle)
fermi-lite 0.1-5
  • links: PTS, VCS
  • area: main
  • in suites: buster, sid
  • size: 652 kB
  • sloc: ansic: 5,157; makefile: 63; sh: 13
file content (153 lines) | stat: -rw-r--r-- 3,078 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
#ifndef AC_BFC_H__
#define AC_BFC_H__

#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <limits.h>
#include <stdio.h>
#include "htab.h"
/* #include "kmer.h" ... this is actually included by htab.h */
#include "internal.h"
#include "fml.h"
#include "khash.h"

/* Andreas Tille <tille@debian.org>: Its not clear where jwala took this from and what its purpose might be - commenting out for the moment
#define _cnt_eq(a, b) ((a)>>14 == (b)>>14)
#define _cnt_hash(a) ((a)>>14)
KHASH_INIT(cnt, uint64_t, char, 0, _cnt_hash, _cnt_eq)
typedef khash_t(cnt) cnthash_t;

struct bfc_ch_s {
  int k;
  cnthash_t **h;
  // private
  int l_pre;
};
*/

typedef struct {
	int n_threads, q, k, l_pre;
	int min_cov; // a k-mer is considered solid if the count is no less than this

	int max_end_ext;
	int win_multi_ec;
	float min_trim_frac;

	// these ec options cannot be changed on the command line
	int w_ec, w_ec_high, w_absent, w_absent_high;
	int max_path_diff, max_heap;
} bfc_opt_t;

/**********************
 *** K-mer counting ***
 **********************/

#define CNT_BUF_SIZE 256

typedef struct { // cache to reduce locking
	uint64_t y[2];
	int is_high;
} insbuf_t;

typedef struct {
	int k, q;
	int n_seqs;
	const fml_seq1_t *seqs;
	bfc_ch_t *ch;
	int *n_buf;
	insbuf_t **buf;
} cnt_step_t;

/***************
 *** Correct ***
 ***************/

#define BFC_MAX_KMER     63
#define BFC_MAX_BF_SHIFT 37

#define BFC_MAX_PATHS 4
#define BFC_EC_HIST 5
#define BFC_EC_HIST_HIGH 2

#define BFC_EC_MIN_COV_COEF .1

/**************************
 * Sequence struct for ec *
 **************************/

#include "kvec.h"

typedef struct { // NOTE: unaligned memory
	uint8_t b:3, q:1, ob:3, oq:1;
	uint8_t dummy;
	uint16_t lcov:6, hcov:6, solid_end:1, high_end:1, ec:1, absent:1;
	int i;
} ecbase_t;

typedef kvec_t(ecbase_t) ecseq_t;

/********************
 * Correct one read *
 ********************/

#include "ksort.h"

#define ECCODE_MISC      1
#define ECCODE_MANY_N    2
#define ECCODE_NO_SOLID  3
#define ECCODE_UNCORR_N  4
#define ECCODE_MANY_FAIL 5

typedef struct {
	uint32_t ec_code:3, brute:1, n_ec:14, n_ec_high:14;
	uint32_t n_absent:24, max_heap:8;
} ecstat_t;

typedef struct {
	uint8_t ec:1, ec_high:1, absent:1, absent_high:1, b:4;
} bfc_penalty_t;

typedef struct {
	int tot_pen;
	int i; // base position
	int k; // position in the stack
	int32_t ecpos_high[BFC_EC_HIST_HIGH];
	int32_t ecpos[BFC_EC_HIST];
	bfc_kmer_t x;
} echeap1_t;

typedef struct {
	int parent, i, tot_pen;
	uint8_t b;
	bfc_penalty_t pen;
	uint16_t cnt;
} ecstack1_t;

typedef struct {
	const bfc_opt_t *opt;
	const bfc_ch_t *ch;
	kvec_t(echeap1_t) heap;
	kvec_t(ecstack1_t) stack;
	ecseq_t seq, tmp, ec[2];
	int mode;
	ecstat_t ori_st;
} bfc_ec1buf_t;

/********************
 * Error correction *
 ********************/

typedef struct {
	const bfc_opt_t *opt;
	const bfc_ch_t *ch;
	bfc_ec1buf_t **e;
	int64_t n_processed;
	int n_seqs, flt_uniq;
	fml_seq1_t *seqs;
} ec_step_t;

void kmer_correct(ec_step_t * es, int mode, bfc_ch_t * ch);
void bfc_opt_init(bfc_opt_t *opt);

#endif