File: SeedPattern.h

package info (click to toggle)
perm 0.4.0-8
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 976 kB
  • sloc: cpp: 13,499; makefile: 98; sh: 12
file content (95 lines) | stat: -rw-r--r-- 4,852 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#pragma once
#include "bitsOperationUtil.h"
#include "ReadInBits.h"
#define EXTEND_SEED true

/*
 * This file define the seed function including generating hashValue and key for short DNA read according to seed,
 * which is consisted of repeated pattern of selected position.
 * F2 seed is consist of repeat pattern (111*1**), which is full sensitive to two mismatches.
 * S1_1 seed is consist of repeat pattern (1111**1***), which is full sensitive to three mismatches.
 * S2_0 seed is consist of repeat pattern (1111**1****), which is full sensitive to two pairs of consecutive mismatches.
 * F3 seed is consist of repeat pattern (111*1**1***), which is full sensitive to three mismatches.
 * F4 seed is consist of repeat pattern (11***1****), which is full sensitive to three mismatches.
*/
typedef unsigned int(*ptHashFunc)(CReadInBits);

// currently the bits for hashing is 13
const unsigned int BITS_FOR_HASHING = 13;

// Corresponding to the seed that is full sensitive to exact match alignments
unsigned int getF0SeedHashValue(CReadInBits r);
unsigned int getF0SeedKey(CReadInBits r, int keyWeight);
ptHashFunc selectF0(int readlength);

// Corresponding to the seed that is full sensitive to one mismatch alignments
unsigned int getF1SeedHashValue(CReadInBits r);
unsigned int getF1SeedHashValue15(CReadInBits r);
unsigned int getF1SeedKey(CReadInBits r, int keyWeight);
ptHashFunc selectF1(int readlength);

// Corresponding to the seed that is full sensitive to two mismatches alignments
unsigned int getF2SeedHashValue(CReadInBits r);
unsigned int getF2SeedHashValue4ReadLength25_27(CReadInBits r);
unsigned int getF2SeedHashValue4ReadLength23_24(CReadInBits r);
unsigned int getF2SeedKey(CReadInBits r, int keyWeight);
ptHashFunc selectF2(int readlength);

// Corresponding to the seed that is full sensitive to alignments with
// two consecutive mismatches + a randome mismatches (For Solid)
unsigned int getS1_1SeedHashValue(CReadInBits r);
// Hash Value for differnet length
unsigned int getS1_1SeedHashValue4ReadLength31(CReadInBits r); // get first 12 care positions
unsigned int getS1_1SeedHashValue4ReadLength30(CReadInBits r); // get first 11 care positions
unsigned int getS1_1SeedHashValue4ReadLength26_29(CReadInBits r); // get first 10 care positions
unsigned int getS1_1SeedHashValue4ReadLength23_25(CReadInBits r); // get first 9 care positions
unsigned int getS1_1SeedKey(CReadInBits r, int keyWeight);
ptHashFunc selectS1_1(int readlength);

// Seed that is full sensitive to alignments with two pairs of two consecutive mismatches
unsigned int getS2_0SeedHashValue(CReadInBits r);
unsigned int getS2_0SeedHashValue4ReadLength34(CReadInBits r);
unsigned int getS2_0SeedHashValue4ReadLength33(CReadInBits r);
unsigned int getS2_0SeedHashValue4ReadLength28_32(CReadInBits r);
unsigned int getS2_0SeedHashValue4ReadLength25_27(CReadInBits r);
unsigned int getS2_0SeedKey(CReadInBits r, int keyWeight);
unsigned int getS2_0SeedKey4ReadLength34(CReadInBits r, int keyWeight);
ptHashFunc selectS2_0(int readlength);


// Sseed full sensitive to alignments with any three random mismatches.
unsigned int getF3SeedHashValue(CReadInBits r);
unsigned int getF3SeedHashValue4ReadLength34(CReadInBits r);
unsigned int getF3SeedHashValue4ReadLength33(CReadInBits r);
unsigned int getF3SeedHashValue4ReadLength29_32(CReadInBits r);
unsigned int getF3SeedHashValue4ReadLength26_28(CReadInBits r);
unsigned int getF3SeedHashValue4ReadLength25(CReadInBits r);
unsigned int getF3SeedKey(CReadInBits r, int keyWeight);
unsigned int getF3SeedKey4ReadLength34(CReadInBits r, int keyWeight);
unsigned int getF3SeedKey4ReadLength32(CReadInBits r, int keyWeight);
ptHashFunc selectF3(int readlength);

// Seed that is full sensitive to alignments with one two consecutive mismatches pair + two random mismatches
unsigned int getS1_2SeedHashValue(CReadInBits r);
unsigned int getS1_2SeedKey4ReadLength46_49(CReadInBits r, int keyWeight);

// Seed that is full sensitive to alignments with  four random mismatches
unsigned int getF4SeedHashValue(CReadInBits r);
unsigned int getF4SeedHashValue4ReadLength41(CReadInBits r);
unsigned int getF4SeedHashValue4ReadLength40(CReadInBits r);
unsigned int getF4SeedHashValue4ReadLength35_39(CReadInBits r);
unsigned int getF4SeedHashValue4ReadLength31_34(CReadInBits r);
unsigned int getF4SeedKey4ReadLength45_49(CReadInBits r);
ptHashFunc selectF4(int readlength);

inline unsigned int returnDummyHashKey(CReadInBits r, int keyWeight)
{
    keyWeight = 0;
    return 0;
}

// Return the weight of a seed, given the repeated seed pattern ex: 111*1** and the read-length.
unsigned int getNoOfCaredPositions(const char* SeedRepeat, unsigned int uiReadLength);
// Return the # of cared position if the periodic
unsigned int getNoOfCaredPositions4FullRead(const char* caSeedRepeat, unsigned int uiReadLength);