File: PackedDNASequence.hpp

package info (click to toggle)
pbseqlib 5.3.4%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye
  • size: 7,020 kB
  • sloc: cpp: 77,246; python: 331; sh: 103; makefile: 42
file content (62 lines) | stat: -rw-r--r-- 1,805 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#ifndef _BLASR_PACKED_DNA_SEQUENCE_HPP_
#define _BLASR_PACKED_DNA_SEQUENCE_HPP_

#include <fstream>

#include <pbdata/DNASequence.hpp>
#include <pbdata/NucConversion.hpp>
#include <pbdata/utils/BitUtils.hpp>

/*
 * Implement a structure to maintain DNA as 3 bits per nucleotide.
 * The extra bit is required for N's in the sequence.  A key is that
 * this structure is read-only through the operator[].  It may be set
 * with the PackedDNASequence::Set(pos,value) command.
 */

typedef unsigned int PackedDNAWord;

class PackedDNASequence
{
public:
    static const PackedDNAWord Mask2All = 613566756;  //100100100...
    static const PackedDNAWord Mask1All = 306783378;  //010010010...
    static const PackedDNAWord Mask0All = 153391689;  //001001001...
    static const PackedDNAWord xorMask[];
    static const PackedDNAWord NucPosMask[];
    static const PackedDNAWord NegMask[];
    static const PackedDNAWord MaskRL[];
    static const PackedDNAWord MaskLR[];
    static const PackedDNAWord NucMask = 7;
    static const DNALength NucsPerWord = 10;
    PackedDNAWord *seq;
    int nCountInWord;
    int nCountNuc;
    DNALength length;
    DNALength arrayLength;

    Nucleotide Get(DNALength pos);

    Nucleotide operator[](DNALength pos);

    PackedDNASequence();
    ~PackedDNASequence();

    void Allocate(DNALength numberOfNucleotides);

    void CreateFromDNASequence(DNASequence &dnaSeq);

    void Set(DNALength pos, Nucleotide threeBitValue);

    DNALength CountInWord(PackedDNAWord word, PackedDNAWord wordMask, Nucleotide nuc);

    DNALength CountNuc(DNALength start, DNALength end, Nucleotide nuc);

    void Write(std::ostream &out);

    void Read(std::istream &in);

    void PrintUnpacked(std::ostream &out, int lineLength = 50);
};

#endif  // _BLASR_PACKED_DNA_SEQUENCE_HPP_