File: mergectg.h

package info (click to toggle)
bio-rainbow 2.0.4%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 560 kB
  • sloc: ansic: 7,475; perl: 172; makefile: 129; sh: 49
file content (139 lines) | stat: -rw-r--r-- 3,753 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#ifndef MERGECTG_H
#define MERGECTG_H

#include <stdint.h>
#include "list.h"
#include "file_reader.h"
#include "hashset.h"
#include "string.h"
#include "stdaln.h"
#include "asm_R2.h"
#include "bloom_filter.h"

typedef struct {
	char seq[MAX_RD_LEN+1];
	uint32_t seq_id;
	uint32_t rd_len;
	uint32_t rank;
} read_t;

define_list(readv, read_t);

typedef struct {
	uint64_t kmer:62, kpos:2;
} rd_kmer_t;

#define rd_kmer_code(r) u32hashcode((r).kmer)
#define rd_kmer_eq(r1, r2) ((r1).kmer == (r2).kmer)
define_hashset(rdkhash, rd_kmer_t, rd_kmer_code, rd_kmer_eq);
define_list(idxv, rdkhash*);
//define_list(idxv, BloomFilter*);

typedef struct {
//	uint32_t id, clsid, old_clsid, sz;
	uint32_t id;
	int closed;
//	char *seq, *sec_seq;
	String *path;
	readv *rds;
	u32list *m_rds;  // merged reads index
	rdkhash *index;
//	BloomFilter *index;
	idxv *m_idx;  // merged multiple index
//	Vector *efctgs;
} contig_t; 

#define contig_code(c) u32hashcode((c).id)
#define contig_eq(c1, c2) ((c1).id == (c2).id)
define_hashset(ctgset, contig_t, contig_code, contig_eq);

typedef struct {
	uint32_t id;
	char *seq;
} contig_seq_t;

define_list(contigv, contig_t*);
define_list(contigsv, contig_t*);

typedef struct pathtree_t pathtree_t;
struct pathtree_t {
	uint32_t tid; // leaf records contig ID
	pathtree_t *left;
	pathtree_t *right;
};

typedef struct {
	uint64_t kmer, kpos;
	uint32_t id; // which contig
	int offset;  // offset w.r.t. the current contig
	int offset2; // offset of query contig
} ctg_kmer_t;

#define kmer_code(k) u64hashcode((k).kmer)
#define kmer_eq(k1, k2) ((k1).kmer == (k2).kmer)
define_hashset(ctgkhash, ctg_kmer_t, kmer_code, kmer_eq);

typedef struct {uint32_t key; uint32_t oldid; char *path;} uuchash_t;
#define uuchash_code(e) (e).key
#define uuchash_equals(e1, e2) ((e1).key == (e2).key)
define_hashset(uuchash, uuchash_t, uuchash_code, uuchash_equals);

define_list(ctgkmerv, ctg_kmer_t);

typedef struct {
	uint64_t last; //last kmer position
	int offset; // current kmer offset
} link_t;

define_search_array(bisearch, uint64_t, native_number_cmp);

typedef struct {
	contigv *ctgs;
	contigsv *cache;
	pathtree_t *tree;
	uint32_t min_kmer; // parameter: # kmers to define two similar contigs
	uint32_t min_overlap; // parameter
	float het; // parameter
	uint32_t RD_KMER_SIZE; // parameter
	uint32_t min_ol; //parameter for asm
	float min_sm; // parameter for asm
	uint32_t min_read; // parameter for asm
	uint32_t max_read; // parameter for asm
	uint32_t sim_pairs;
	uint32_t max_cluster; //parameter
	uint32_t need_asm; // parameter
	uint32_t cid; //
	EF *ef;
	int flag;  // if == 0 first use, init; else reset
} merge_t;


#ifdef __CPLUSPLUS
extern "C" {
#endif

merge_t* init_merger(uint32_t min_kmer, uint32_t min_overlap, float het, uint32_t kmersize, uint32_t max_cluster, uint32_t need_asm, float min_sm, uint32_t min_read, uint32_t max_read);
//void merge_ctgs(merge_t *merger, FileReader *asmd, FileReader *divd, FILE *out);
void merge_ctgs(merge_t *merger, FileReader *in, FILE *out);
void merge_along_tree(merge_t *merger, pathtree_t *tree);
void merge_core(merge_t *merger);
void free_index(merge_t *merger);
void free_ctg(contig_t *ctg);
void free_ctgs(merge_t *merger);
void build_tree(merge_t *merger);
void update_ctg2merge(merge_t *merger);
int is_similar_enough(merge_t *merger, contig_t *c1, contig_t *c2);
void merge_2ctg(merge_t *merger, contig_t *ctg1, contig_t *ctg2);
void update_merger(merge_t *merger, contig_t *ctg1, contig_t *ctg2);
void prefix_path(char *s1, char *s2, int n, char *pre);
void destroy_tree(pathtree_t *t);
void free_tree(merge_t *merger);
void reset_merger(merge_t *merger);
void free_merger(merge_t *merger);
void clear_ctg(contig_t *ctg);

#ifdef __CPLUSPLUS
}
#endif

#endif