File: Kmer.h

package info (click to toggle)
mapsembler2 2.2.4%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 8,208 kB
  • sloc: cpp: 51,300; ansic: 13,434; sh: 483; makefile: 394; asm: 271; python: 28
file content (84 lines) | stat: -rw-r--r-- 3,100 bytes parent folder | download | duplicates (11)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#ifndef Kmer64_h
#define Kmer64_h

#include <stdint.h>

#ifdef _largeint
#include "LargeInt.h"
typedef LargeInt<KMER_PRECISION> kmer_type;
#else
#ifdef _ttmath
#include "ttmath/ttmath.h"
typedef ttmath::UInt<KMER_PRECISION> kmer_type;
#else
#if (! defined kmer_type) || (! defined _LP64)
typedef uint64_t kmer_type;
#endif
#endif
#endif

extern int sizeKmer;
extern kmer_type kmerMask;
extern kmer_type kmerMaskm1;

extern uint64_t nsolids;

int NT2int(char nt);
int revcomp_int(int nt_int);
kmer_type  codeSeed(char *seq, int sizeKmer, kmer_type kmerMask);
kmer_type  codeSeed(char *seq);
kmer_type  codeSeedRight(char *seq, kmer_type  val_seed, bool new_read);
kmer_type  codeSeedRight(char *seq, kmer_type  val_seed, bool new_read, int sizeKmer, kmer_type kmerMask);
kmer_type  codeSeedRight_revcomp(char *seq, kmer_type  val_seed, bool new_read);
kmer_type  codeSeedRight_revcomp(char *seq, kmer_type  val_seed, bool new_read, int sizeKmer, kmer_type kmerMask);
unsigned char  code_n_NT(char *seq, int nb);
unsigned char  code4NT(char *seq);

uint64_t revcomp(uint64_t x);
uint64_t revcomp(uint64_t x, int size);

#ifdef _largeint
LargeInt<KMER_PRECISION> revcomp(LargeInt<KMER_PRECISION> x);
LargeInt<KMER_PRECISION> revcomp(LargeInt<KMER_PRECISION> x, int size);
#endif
#ifdef _ttmath
ttmath::UInt<KMER_PRECISION> revcomp(ttmath::UInt<KMER_PRECISION> x);
ttmath::UInt<KMER_PRECISION> revcomp(ttmath::UInt<KMER_PRECISION> x, int size);
#endif
#ifdef _LP64
__uint128_t revcomp(__uint128_t x);
__uint128_t revcomp(__uint128_t x, int size);
#endif

int code2seq ( kmer_type code,char *seq);
int code2seq ( kmer_type code,char *seq, int sizeKmer, kmer_type kmerMask);
int code2nucleotide( kmer_type code, int which_nucleotide);

kmer_type extractKmerFromRead(char *readSeq, int position, kmer_type *graine, kmer_type *graine_revcomp);
kmer_type extractKmerFromRead(char *readSeq, int position, kmer_type *graine, kmer_type *graine_revcomp, bool sequential);
kmer_type extractKmerFromRead(char *readSeq, int position, kmer_type *graine, kmer_type *graine_revcomp, bool sequential, int sizeKmer, kmer_type kmerMask);

// compute the next kmer w.r.t forward or reverse strand, e.g. for ACTG (revcomp = CAGT)
// it makes sure the result is the min(kmer,revcomp_kmer)
// indicates if the result is the revcomp_kmer by setting *strand 
// examples:
// next_kmer(ACTG,A,&0)=CTGA with strand = 0 (because revcomp=TCAG); 
// next_kmer(ACTG,A,&1)= (revcomp of ACTG + A = CAGT+A = ) AGTA with strand = 0 (because revcomp = TACT)
kmer_type next_kmer(kmer_type graine, int added_nt, int *strand);

void revcomp_sequence(char s[], int len);


kmer_type  codeSeed_bin(char *seq);

kmer_type  codeSeedRight_bin(char *seq, kmer_type  val_seed, bool new_read);

kmer_type  codeSeedRight_revcomp_bin(char *seq, kmer_type  val_seed, bool new_read);

kmer_type extractKmerFromRead_bin(char *readSeq, int position, kmer_type *graine, kmer_type *graine_revcomp, bool use_compressed);

char* print_kmer(kmer_type kmer); // debugging
char* print_kmer(kmer_type kmer, int sizeKmer, kmer_type kmerMask); // debugging


#endif