File: mapfrag.h

package info (click to toggle)
segemehl 0.3.4-5
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 2,024 kB
  • sloc: ansic: 35,270; makefile: 43; sh: 37
file content (223 lines) | stat: -rw-r--r-- 8,528 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
/*
 *   segemehl - a read aligner
 *   Copyright (C) 2008-2017  Steve Hoffmann and Christian Otto
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */


#ifndef MAPFRAG_H
#define MAPFRAG_H

/*
 *
 *	mapfrag.h
 *  
 * 
 *  @author Steve Hoffmann, steve@bioinf.uni-leipzig.de
 *  @company Bioinformatics, University of Leipzig 
 *  @date 10.12.2012 10:25:35 CET  
 *
 */


#include <string.h>
#include <limits.h>

#include "basic-types.h"
#include "locus.h"
#include "multicharseq.h"
#include "sufarray.h"
#include "kdseed.h"
#include "biofiles.h"
#include "segemehl.h"

typedef struct mapseed_s {
  uint64_t u;
  char rc; //strand TODO
  double evalue;
  double score;
  unsigned int len;
  unsigned int mat;
  unsigned int mis;
  unsigned int ins;
  unsigned int del;
  unsigned int edist;
  unsigned int l;
  unsigned int r;
  char good;
  char maxinterval;
  char maxevalue;
  locus_t *locus;
  /*these variables are not always set*/
  uint64_t seedlen;
  uint64_t refidx;
  uint64_t refpos;
  char *refname;
} mapseed_t;

typedef struct mapseedlist_s {
  unsigned int n;
  mapseed_t *l;
} mapseedlist_t;

typedef struct mapfrag_s {
  mapseed_t *seed;
  char *seq;
  char *qual;
  MultiCharSeqAlignment *mcsa;
  unsigned int lclip;
  unsigned int rclip;
  unsigned int mat;
  unsigned int mis;
  unsigned int ins;
  unsigned int del;
  unsigned int lmat;
  unsigned char mate;
  unsigned char mapq;
  int leftgap;
  int rightgap;
  unsigned char nextnoncollinear;
  unsigned char prevnoncollinear;
  unsigned char issplit;
  double mapq_dbl;
} mapfrag_t;

typedef struct mapping_s {
  char *seq;
  char *qual;
  Uint seqlen;
  Uint lclip;
  Uint rclip;
  int scr;
  unsigned int n;
  unsigned int P;
  unsigned int Q;
  char consecutive;
  char matestatus;
  mapfrag_t *f;
  double mapqual;
  double sigma;
  double maxqual;
} mapping_t;

typedef struct mappingset_s {
  unsigned int n;
  mapping_t* elem;
} mappingset_t;

void bl_removeMapping (mapping_t *l);
void bl_removeMappingSet (mappingset_t *s);
void bl_removeUnpairedMapping(mappingset_t *s);
char bl_mappingsetHasPaired (mappingset_t *s);
long long int bl_distMapping(mapping_t *l, mapping_t *r);
mapping_t* bl_concatMapping(mapping_t* l, mapping_t *r); 
void bl_initMappingSet(mappingset_t *set);
void bl_addMapping (mappingset_t *s, mapping_t *l);
Uint bl_addMapFrag(mapping_t *l, MultiCharSeqAlignment *mcsa, 
    mapseed_t *seed, unsigned char mate, unsigned char issplit) ;
mapseedlist_t*
bl_addMapSeedBranch (mapseedlist_t *s, branch_t *b, unsigned int u, unsigned int strand, 
    double maxE, unsigned int maxM, double SPM, karlin_t *stats);
unsigned int
bl_mappingsetHasPos (mappingset_t *s, unsigned int pos);
void bl_removeSuboptimalMapping (mappingset_t *s, int *scores, int indel);
char* bl_getMapFragQryDesc (mapfrag_t *f);
char* bl_getMapFragQry (mapfrag_t *f);
char* bl_getMapFragQual (mapfrag_t *f);
unsigned char bl_getMapFragStrand (mapfrag_t *f);
unsigned int bl_getMapFragV(mapfrag_t *f);
unsigned int bl_getMapFragU(mapfrag_t *f);
unsigned int bl_getMapFragP(mapfrag_t *f);
unsigned int bl_getMapFragQ(mapfrag_t *f);
char* bl_getMapFragRefDesc(mapfrag_t *f);
Alignment* bl_getMapFragAlignment (mapfrag_t *f);
unsigned int bl_getMappingEdist (mapping_t *l);
unsigned int bl_getMapFragEdist(mapfrag_t *f);
mapseedlist_t* bl_initMapSeedList (mapseedlist_t *l);
void bl_removeMappingSet (mappingset_t *s);
void bl_wrapSeedList (mapseedlist_t *l);
void bl_initMapping(mapping_t *l, char*, char*, Uint, Uint);
MultiCharSeqAlignment * bl_copyMCSA(MultiCharSeqAlignment *dest, MultiCharSeqAlignment *src);
char bl_isPairedMapping (mapping_t *l);
char bl_isQueryMapping (mapping_t *l);
char bl_isMateMapping (mapping_t *l);
char bl_hasMateMapping (mappingset_t *s);
char bl_hasQueryMapping (mappingset_t *s);
unsigned int bl_getMapFragChrIdx(mapfrag_t *f);
unsigned int bl_getMapFragSplit (mapfrag_t *f);
unsigned char bl_getMapFragIsMate (mapfrag_t *f);
MultiCharSeqAlignment* bl_getMapFragMCSA(mapfrag_t *f);
Uint bl_getMateStartIdx (mapping_t *l);
Uint bl_getQueryStartIdx (mapping_t *l);
mapseed_t *bl_getMapSeedListBest (mapseedlist_t *l);
locus_t * bl_getMapSeedLocus (mapseed_t *seed, Uint j, MultiCharSeq *mseq, Suffixarray *s);
void bl_getMapSeedAdditionalInformation(mapseed_t *seed, Suffixarray *s, MultiCharSeq *mseq);
void bl_addMapFragPartialEdist (mapfrag_t *frag, Uint edist);
unsigned int bl_getMapFragGetUlen (mapfrag_t *f);
uint64_t bl_getMapFragSubstart (mapfrag_t *f);
void bl_removeBadMappings (mappingset_t *s, Uint querylen, Uint matelen, Uint acc);
void bl_removeBadMates (mappingset_t *s, Uint querylen, Uint matelen, Uint acc);
void bl_countMultipleMappings (mappingset_t *s, Uint *nqueries, Uint *nmates);
unsigned int bl_getMappingEdistQM (mapping_t *l, Uint *qedist, Uint *medist);
void bl_setMapFragMapQual (mapfrag_t *f, double mapqual);
double bl_getMapFragMapQual (mapfrag_t *f);
unsigned int bl_getMapFragLongestMatch(mapfrag_t *f);
mapfrag_t* bl_getMapFrags (mapping_t *m, Uint *size);
mapping_t* bl_getMappings(mappingset_t *s, Uint *size);
unsigned int bl_getMapFragScore (mapfrag_t *f, int *scores, int indel);
void bl_removeMappingQM (mapping_t *l, char ismate);
Uint bl_getNextMapFragU (mapfrag_t *f, mapping_t *m, char ismate, Uint *lgap, Uint *rgap);
int bl_getMappingScore (mapping_t *l, int * scores, int indel);
mappingset_t* bl_copyMappingSet(mappingset_t *s);
locuslist_t* bl_getMappingLocusList(mapping_t *mapping, MultiCharSeq* mseq, char ismate);
char bl_isSplitMappingQM (mapping_t *mapping, char ismate);
char bl_getMappingStrandQM (mapping_t *mapping, char ismate);
char bl_isCollinearMapping(mapping_t *m, char ismate);
mapfrag_t* bl_getMapFragsQM (mapping_t *m, Uint *size, char ismate);
unsigned int bl_getMapFragLeft(mapfrag_t *f);
unsigned int bl_getMapFragRight(mapfrag_t *f);
int bl_getMappingScoreQM (mapping_t *l, int *scores, int indel, int *qscore, int *mscore);
int bl_getMappingFragNoQM (mapping_t *l, Uint *qno, Uint *mno);
char bl_hasQueryMappingMaxEdist (mappingset_t *s, Uint maxedist);
char bl_hasMateMappingMaxEdist (mappingset_t *s, Uint maxedist);
Uint bl_getMappingRange(mapping_t *m);
char bl_getMappingIsConsecutive(mapping_t *m);
void bl_dumpMappingSet (FILE *dev, mappingset_t *set);
int bl_getMappingMaxScore(mappingset_t *s, int *scores, int indel);
void bl_removeBadMatesEdistQM (mappingset_t *s, Uint maxedist, Uint ismate);
mappingset_t* bl_sortMappingSetByScore (mappingset_t *set, int* scores, int indel);
void bl_removeBadMatesAcc (mappingset_t *s, Uint querylen, Uint matelen, Uint acc); 
int bl_getMappingMaxScoreQM(mappingset_t *s, int *scores, int indel, char ismate);
void bl_removeSuboptimalMappingQM (mappingset_t *s, int *scores, int indel, char ismate);
void bl_mergeMappings (mappingset_t *set);
void bl_removeBadMatesCov (mappingset_t *s, Uint querylen, Uint matelen, Uint mc);
void bl_concatMappingSet (mappingset_t *dest, mappingset_t*source);
char bl_hasMappingPairedMaxEdist (mappingset_t *s, Uint maxedist);
mappingset_t* bl_mappingsetRemoveDuplicates(mappingset_t *set, Uint maxdist);
void bl_dumpMapping (FILE *dev, mapping_t *m);
mapping_t* bl_copyMapping (mapping_t* dest, mapping_t* source);
char bl_hasMultipleQueryMappings (mappingset_t *s);
char bl_hasMultipleMateMappings (mappingset_t *s);
char bl_hasMultiplePairedMappings (mappingset_t *s);
char bl_isCircularMappingQM (mapping_t *m, char ismate);
void bl_dumpSpliceJunctions(mapping_t *m, char ismate, MultiCharSeq *mseq, char *basename, char *name, segemehl_t *nfo);
char bl_mappingGetType (mapping_t *m, char ismate);
Uint  bl_getMappingMinFragDist (mapping_t *l);
char bl_getMapFragIsChimeric(mapfrag_t *f);
char bl_getMappingIsChimericQM (mapping_t *m, char ismate);
void bl_removeBadMatesScr (mappingset_t *s, Uint querylen, Uint matelen, int *scores, int indel, int minscr, int minmatescr);
char bl_mappingHasPairedEndOrientation(mapping_t *mapping);
#endif