File: rainbow.h

package info (click to toggle)
bio-rainbow 2.0.4+dfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, buster, sid
  • size: 560 kB
  • sloc: ansic: 7,475; perl: 172; makefile: 129; sh: 49
file content (127 lines) | stat: -rw-r--r-- 3,000 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
/*
 * 
 * Copyright (c) 2011, Jue Ruan <ruanjue@gmail.com>
 *
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 
#ifndef __RAINBOW_RJ_H
#define __RAINBOW_RJ_H

#include <stdint.h>
#include <time.h>
#include <unistd.h>
#include "bitvec.h"
#include "hashset.h"
#include "list.h"
#include "sort.h"
#include "dna.h"
#include "file_reader.h"
#include "string.h"
//#include "mergecontig.h"
#include "mergectg.h"


//static uint32_t KMER_SIZE = 15;
//static uint32_t KMER_NUM = 6;
//#define KMER_NUM	6

typedef struct {
	uint32_t kmer1, kmer2, seqid;
} kmer_t;

#define kmer_hashcode(k) u64hashcode((((uint64_t)(k).kmer1) << 32) | (k).kmer2)
#define kmer_equals(k1, k2) (((k1).kmer1 == (k2).kmer1) && ((k1).kmer2 == (k2).kmer2))
define_hashset(khash, kmer_t, kmer_hashcode, kmer_equals);

typedef struct {
	uint64_t *seqs;
	uint32_t n_rd;
	uint8_t  rd_len, max_rd_len;
	u64list *seqoffs;
	u8list  *seqlens;
} SeqDB;

typedef struct {
	uint32_t bt;
	uint32_t len;
	uint64_t seq[8];
} SBT;

define_list(sbtv, SBT);

typedef struct {
	SeqDB    *sdb, *sdb2;
	uint64_t seq1[10], seq2[10];
	uint32_t gidoff;
	uint32_t max_seqid;
	uint32_t max_pair_len;
	uint32_t max_mm;
	uint32_t exact_limit;
	uint32_t idxs[2];
	uint32_t KMER_SIZE;
	uint32_t KMER_NUM;
	khash *index;
	u32list *links;
	BitVec  *flags;
	//uuhash *gid_map;
	u32list *gid_map;
	u32list *gids;
	u32list *bts;
	sbtv    *sbts;
} Cluster;

Cluster* init_cluster(uint32_t max_mm, uint32_t exact_limit, uint32_t KMER_SIZE, uint32_t KMER_NUM);
void indexing_cluster(Cluster *cluster, FileReader *fr1, int is_fq, int fix_rd_len);
void clustering(Cluster *cluster, FileReader *fr2, int is_fq, int fix_rd_len, FILE *out);
void free_cluster(Cluster *cluster);

typedef struct {
	uint32_t seqid, seqoff, seqlen1:10, seqlen2:10, rank:6, revsed:6;
} ReadInfo;

define_list(rilist, ReadInfo);

define_list(u32slist, u32list*);

typedef struct {
	uint32_t col, cnt, base;
} col_base_t;

define_list(cbv, col_base_t);

typedef struct {
	uint32_t gidoff;
	rilist *rds;
	u8list *seqs;
	u32slist *grps, *cache;
	u64list *markers[4];
	u32list *deps;
	u32list *gids;
	cbv *cbs;
	u32list *ps1;
	u32list *ps2;
	uint32_t n_col;
	uint32_t k_allele, K_allele;
	float min_freq;
} Div;

Div* init_div(uint32_t k_allele, uint32_t K_allele, float min_freq);
uint32_t div_reads(Div *div, FileReader *fr, FILE *out);
void reset_div(Div *div);
void free_div(Div *div);


#endif