File: samLine.h

package info (click to toggle)
crac 2.5.2%2Bdfsg-6
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 6,996 kB
  • sloc: cpp: 41,838; sh: 391; makefile: 370
file content (341 lines) | stat: -rw-r--r-- 11,237 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
/******************************************************************************
*  Copyright © 2009-2016 -- LIRMM/CNRS                                        *
*                           (Laboratoire d'Informatique, de Robotique et de   *
*                           Microélectronique de Montpellier /                *
*                           Centre National de la Recherche Scientifique)     *
*                           LIFL/INRIA                                        *
*                           (Laboratoire d'Informatique Fondamentale de       *
*                           Lille / Institut National de Recherche en         *
*                           Informatique et Automatique)                      *
*                           LITIS                                             *
*                           (Laboratoire d'Informatique, du Traitement de     *
*                           l'Information et des Systèmes).                   *
*                                                                             *
*  Copyright © 2011-2016 -- IRB/INSERM                                        *
*                           (Institut de Recherches en Biothérapie /          *
*                           Institut National de la Santé et de la Recherche  *
*                           Médicale).                                        *
*                                                                             *
*  Copyright © 2015-2016 -- AxLR/SATT                                         *
*                           (Lanquedoc Roussilon /                            *
*                            Societe d'Acceleration de Transfert de           *
*                            Technologie).	                              *
*                                                                             *
*  Programmeurs/Progammers:                                                   *
*                    Nicolas PHILIPPE <nphilippe.resear@gmail.com>            * 
*                    Mikaël SALSON    <mikael.salson@lifl.fr>                 *
*                    Jérôme Audoux    <jerome.audoux@gmail.com>               *  
*   with additional contribution for the packaging of:	                      *
*                    Alban MANCHERON  <alban.mancheron@lirmm.fr>              *
*                                                                             *
*   Contact:         CRAC list   <crac-bugs@lists.gforge.inria.fr>            *
*   Paper:           CRAC: An integrated RNA-Seq read analysis                *
*                    Philippe N., Salson M., Commes T., Rivals E.             *
*                    Genome Biology 2013; 14:R30.                             *
*                                                                             *
*  -------------------------------------------------------------------------  *
*                                                                             *
*   This File is part of the CRAC program.                                    *
*                                                                             *
*   This program is free software: you can redistribute it and/or modify      *
*   it under the terms of the GNU General Public License as published by      *
*   the Free Software Foundation, either version 3 of the License, or (at     *
*   your option) any later version.  This program is distributed in the       *
*   hope that it will be useful, but WITHOUT ANY WARRANTY; without even       *
*   the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR       *
*   PURPOSE.  See the GNU General Public License for more details.  You       *
*   should have received a copy of the GNU General Public License along       *
*   with this program.  If not, see <http://www.gnu.org/licenses/>.           *
*                                                                             *
******************************************************************************/

#ifndef SAMLINE_H
#define SAMLINE_H
#include <string>
#include <map>
#include <ostream>
#include <vector>

#include "htslib/sam.h"
#include "htslib/kstring.h"

#include "Cigar.h"

using namespace std;

class OptionalField {
private:
  char tag[2];
  char type;
public:

  OptionalField(const char tag_name[2], char type);

  OptionalField(const OptionalField& o);

  virtual OptionalField* clone() = 0; 

  virtual ~OptionalField() {}

  const char *getTag() const;

  char getType() const;

  void saveInBamRecord(kstring_t *str);

  virtual void saveValueInBamRecord(kstring_t *str) = 0;

  virtual std::ostream& printValue(std::ostream& os) const = 0;
};

std::ostream& operator<<(std::ostream& os, const OptionalField& of);

class CharOptionalField : public OptionalField {
private:
  char value;
public:
  CharOptionalField(const char tag[2], char value);
  CharOptionalField(const CharOptionalField& o);
  ~CharOptionalField();
  virtual OptionalField* clone(); 
  char getValue() const;
  virtual void saveValueInBamRecord(kstring_t *str);
  virtual std::ostream& printValue(std::ostream& os) const;
};

class IntOptionalField : public OptionalField {
private:
  int value;
public:
  IntOptionalField(const char tag[2], int value);
  IntOptionalField(const char tag[2], uint value);
  IntOptionalField(const IntOptionalField& o);
  ~IntOptionalField();
  virtual OptionalField* clone(); 
  int getValue() const;
  virtual void saveValueInBamRecord(kstring_t *str);
  virtual std::ostream& printValue(std::ostream& os) const;
};

class StringOptionalField : public OptionalField {
private:
  string value;
public:
  StringOptionalField(const char tag[2], string value);
  StringOptionalField(const char tag[2], char* value);
  StringOptionalField(const StringOptionalField& o);
  ~StringOptionalField();
  virtual OptionalField* clone(); 
  const string& getValue() const;
  virtual void saveValueInBamRecord(kstring_t *str);
  virtual std::ostream& printValue(std::ostream& os) const;
};

// TODO Implement an IntArray optional field sub-class
// This could be a good way to store p_support and p_loc fields
//class IntArrayOptionalField : public OptionalField {
//  private:
//    int *value;
//public:
//
//  IntArrayOptionalField(const char *tag, int* value) 
//  : OptionalField(tag,'B')
//  {
//    value = value; 
//  };
//
//  virtual ~IntArrayOptionalField();
//
//};

typedef map<string, OptionalField* > OF_map; 

class SamLine {
private:
  //GenomeInfo *genome;
  string rname;
  string qname;
  string rnext;
  string seq;
  string qual;
  int rid;
  int ridnext;
  uint flag;
  uint pos;
  uint mapQ;
  uint pnext;
  int tlen;
  Cigar cigar;
  //map<string, string> optionalFields;
  OF_map optionalFields;

  bool isFlagBitSet(uint bit) const;
  void setFlagBit(uint bit);
  void unsetFlagBit(uint bit);

public:
  /*
   * Default constructor that initialize all fields to default value
   */
  SamLine();

  /*
   * Recopy constructor
   */
  SamLine(const SamLine& s);

  // Desctructor
  ~SamLine();

  // Query template NAME
  void setQname(const string& qname);
  const string& getQname() const;

  // bitwize FLAG
  void setFlag(uint flag);
  uint getFlag() const;

  // Flag operations
  // Bit Description
  //   0x1 template having multiple segments in sequencing
  //   0x2 each segment properly aligned according to the aligner
  //   0x4 segment unmapped
  //   0x8 next segment in the template unmapped
  //   0x10 SEQ being reverse complemented
  //   0x20 SEQ of the next segment in the template being reversed
  //   0x40 the rst segment in the template
  //   0x80 the last segment in the template
  //   0x100 secondary alignment
  //   0x200 not passing quality controls
  //   0x400 PCR or optical duplicate
  //   0x800 supplementary alignment
  // bit 1
  bool isTemplateHavingMultipleSegments() const;
  void setTemplateHavingMultipleSegments();
  void unsetTemplateHavingMultipleSegments();

  // bit 2
  bool isEachSegmentsMapped() const;
  void setEachSegmentsMapped();
  void unsetEachSegmentsMapped();

  // bit 4
  bool isSegmentUnmapped() const;
  void setSegmentUnmapped();
  void unsetSegmentUnmapped();

  // bit 8
  bool isNextSegmentUnmapped() const;
  void setNextSegmentUnmapped();
  void unsetNextSegmentUnmapped();

  // bit 16
  bool isSeqReverseComplemented() const;
  void setSeqReverseComplemented();
  void unsetSeqReverseComplemented();

  // bit 32
  bool isNextSeqReverseComplemented() const;
  void setNextSeqReverseComplemented();
  void unsetNextSeqReverseComplemented();

  // bit 64
  bool isFirstSegmentInTheTemplate() const;
  void setFirstSegmentInTheTemplate();
  void unsetFirstSegmentInTheTemplate();

  // bit 128
  bool isLastSegmentInTheTemplate() const;
  void setLastSegmentInTheTemplate();
  void unsetLastSegmentInTheTemplate();

  // bit 256
  bool isSecondaryAlignement() const;
  void setSecondaryAlignement();
  void unsetSecondaryAlignement();

  // bit 512
  bool isFailingQualityControl() const;
  void setFailingQualityControl();
  void unsetFailingQualityControl();

  // bit 1024
  bool isPCRDuplicated() const;
  void setPCRDuplicated();
  void unsetPCRDuplicated();

  // bit 2048
  bool isChimericAlignement() const;
  void setChimericAlignement();
  void unsetChimericAlignement();

  // Reference sequence NAME
  // This id correspond to the ID given for this reference sequence in
  // the SAM headers
  void setRname(const string& rname, const int rid);
  void setUnknownRname();
  const string& getRname() const;
  int getRid() const;

  // 1-based leftmost mapping POSition
  void setPos(uint pos);
  void setUnknownPos();
  uint getPos() const;

  // MAPping Quality
  void setMapQ(uint mapQ); 
  uint getMapQ() const;

  // CIGAR string
  void setCigar(const Cigar& cigar);
  const Cigar& getCigar() const;

  // Ref. name of the mate/next segment
  void setRnext(const string& rnext, const int ridnext);
  const string& getRnext() const;
  int getRidnext() const;

  // Position of the mate/next segment
  void setPnext(uint pnext);
  uint getPnext() const;

  // observed Template LENgth
  void setTlen(int tlen);
  int getTlen() const;

  // segment SEQuence
  void setSeq(const string& seq);
  const string& getSeq() const;
  void reverseComplementeSeq();

  // ASCII of Phred-scaled base QUALity+33
  void setQual(const string& qual);
  const string& getQual() const;
  void reverseQual();

  // Optional fields
  void addOptionalField(const char tag[], char val);
  void addOptionalField(const char tag[], uint val);
  void addOptionalField(const char tag[], int val);
  void addOptionalField(const char *tag, const string& val);
  void addOptionalField(const char *tag, const char* val);

  // How could we re-implement this methode with the new
  // abstract class OptionalField?
  //const string& getOptionalField(const char *tag);
  const OF_map& getOptionalFields() const;
  bool isOptionalFieldDefined(const char *tag);

  // Remove all fields that are user reserved.
  // That means the tag used matches : X?, Y? or Z?
  void removeAllUserOptionalFields();

  // write the SAM line in the file
  ostream &writeLine(ostream &os);

  int writeBamRecord(samFile *out, const bam_hdr_t *h);

private:
};

#endif