File: DNASequence.hpp

package info (click to toggle)
pbseqlib 0~20161219-1
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 5,924 kB
  • ctags: 5,123
  • sloc: cpp: 82,727; makefile: 305; python: 239; sh: 8
file content (184 lines) | stat: -rw-r--r-- 4,653 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
#ifndef  _BLASR_DNA_SEQUENCE_HPP_
#define  _BLASR_DNA_SEQUENCE_HPP_

#include <stdint.h>
#include <stdlib.h>
#include <iostream>
#include <string>
#include <cassert>
#include "Types.h"
#include "NucConversion.hpp"
#include "utils.hpp"
#include "libconfig.h"

#ifdef USE_PBBAM
#include <pbbam/BamRecord.h>
#endif


class DNASequence {
public:
    DNALength length;
    Nucleotide *seq;
    int bitsPerNuc;
    bool deleteOnExit;

    inline DNASequence();
    inline ~DNASequence();

    //--- functions ---//
    
    DNALength size();

    inline void CheckBeforeCopyOrReference(const DNASequence & rhs, std::string seqType = "DNASequence"); 

    void TakeOwnership(DNASequence &rhs);

    void Append(const DNASequence &rhs, DNALength appendPos=0);

    DNASequence &Copy(const DNASequence &rhs, DNALength rhsPos=0, DNALength rhsLength=0);

    void ShallowCopy(const DNASequence &rhs);

    DNASequence & Copy(const std::string & rhs);

    int GetStorageSize() const;

    DNASequence &operator=(const DNASequence &rhs);

    DNASequence &operator=(const std::string &rhs);

    void Print(std::ostream &out, int lineLength = 50) const;

    void PrintSeq(std::ostream &out, int lineLength = 50) const;

    void Allocate(DNALength plength);

    void ReferenceSubstring(const DNASequence &rhs, DNALength pos=0, DNALength substrLength=0); 

    DNALength MakeRCCoordinate(DNALength forPos );

    inline void CopyAsRC(DNASequence &rc, DNALength pos=0, DNALength rcLength =0);

    void MakeRC(DNASequence &rc, DNALength pos=0, DNALength rcLength=0);

    void ToTwoBit();

    inline void ToThreeBit();

    void ToFourBit();

    void ConvertThreeBitToAscii();

    void ToAscii();

    void Assign(DNASequence &ref, DNALength start=0, DNALength plength=0);

    void ToLower();

    void ToUpper(); 

    void Concatenate(const Nucleotide *moreSeq, DNALength moreSeqLength); 

    std::string GetTitle() const; 

    void Concatenate(const Nucleotide* moreSeq);

    void Concatenate(DNASequence &seq); 

    int Compare(DNALength pos, DNASequence &rhs, DNALength rhsPos, DNALength length); 

    int LessThanEqual(DNALength pos, DNASequence &rhs, DNALength rhsPos, DNALength length); 

    int Equals(DNASequence &rhs, DNALength rhsPos, DNALength length, DNALength pos=0 ); 

    int LessThan(DNALength pos,  DNASequence &rhs, DNALength rhsPos, DNALength length); 

    void CleanupASCII(); 

    Nucleotide operator[](int i) {
        return seq[i];
    }

    Nucleotide GetNuc(DNALength i) const; 

    DNALength GetRepeatContent() const; 

    void CleanupOnFree();

    virtual void Free(); 

    void Resize(DNALength newLength);

    DNALength GetSeqStorage() const;

#ifdef USE_PBBAM
    /// Copies a BamRecord as a DNASequence.
    DNASequence & Copy(const PacBio::BAM::BamRecord & record);
#endif 
};

inline DNASequence::DNASequence() {
    seq = NULL;
    length = 0;
    bitsPerNuc = 8;
    deleteOnExit = false;
}

inline DNASequence::~DNASequence() {
    DNASequence::Free();
}


// Sanity check:
// If this DNASequence and rhs are pointing to the same seq 
// in memory, make sure this DNASequence's deleteOnExit is false.
// (otherwise, seq in memory will be deleted before being copied)
inline void DNASequence::CheckBeforeCopyOrReference(const DNASequence & rhs, std::string seqType) {

    if (seq == rhs.seq and seq != NULL and deleteOnExit) {
        std::cout << "ERROR, trying to copying a " << seqType << " to itself." << std::endl;
        exit(1);
    }
}

inline void DNASequence::ToThreeBit() {
    DNALength i;
    if (bitsPerNuc != 3) 
        for (i = 0; i < length; i++) { seq[i] = ThreeBit[seq[i]]; }
    bitsPerNuc = 3;
}

inline void DNASequence::CopyAsRC(DNASequence &rc, DNALength pos, DNALength rcLength) {
    // Free rc before copying any data to rc.
    ((DNASequence&)rc).Free();
    //
    // Different way of accounting for position. The position is on
    // the rc strand, not the forward strand.
    //
    if (rcLength == 0) {
        rcLength = length - pos;
    }
    DNALength rcStart = length - (pos + rcLength);
    ((DNASequence&)rc).Resize(rcLength);
    DNALength i;
    for (i = 0; i < rcLength; i++) {
        rc.seq[i] = ReverseComplementNuc[seq[rcStart - 1 + (rcLength - i)]];
    }

    // The reverse complement controls its own memory now.
    rc.deleteOnExit = true;
}


template<typename T>
DNALength ResizeSequence(T &dnaseq, DNALength newLength) {
    assert(newLength > 0);
    ((T&)dnaseq).Free();
    dnaseq.seq = ProtectedNew<Nucleotide>(newLength);
    dnaseq.length = newLength;
    dnaseq.deleteOnExit = true;
    return newLength;
}

#endif // _BLASR_DNA_SEQUENCE_HPP_