File: CompressedSequence.hpp

package info (click to toggle)
pbseqlib 0~20161219-1
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 5,924 kB
  • ctags: 5,123
  • sloc: cpp: 82,727; makefile: 305; python: 239; sh: 8
file content (100 lines) | stat: -rw-r--r-- 2,371 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#ifndef _BLASR_COMPRESSED_SEQUENCE_HPP_
#define _BLASR_COMPRESSED_SEQUENCE_HPP_
#include <cassert>
#include <stdint.h>
#include <iostream>
#include <fstream>
#include <ostream>
#include <cstring>
#include "utils.hpp"
#include "Types.h"
#include "Enumerations.h"
#include "NucConversion.hpp"
#include "DNASequence.hpp"
#include "qvs/QualityValue.hpp"
#include "reads/ZMWGroupEntry.hpp"
#include "FASTASequence.hpp"
#include "ReverseCompressIndex.hpp"

typedef unsigned char CompressedNucleotide;

template<typename T_Sequence>
class CompressedSequence : public FASTASequence {
private:
    int hasIndex;
    int hasTitle;
    ReverseCompressIndex index;

    static const unsigned char MaskCount = 0xf;
    static const unsigned char MaskNuc   = 0xf0;
    static const unsigned char ShiftCount = 4;

public:
    //
    // This is just a placeholder for now.  
    // No extra data here, just the ability to decompress.  Right now 
    // the utilities for the compressed dna sequences
    // are in CompressedSeqUtils.h, which could move here later.
    //
    QualityValue *qual;

    void CopyConfiguration(CompressedSequence<T_Sequence> &rhs); 

    void ShallowCopy(CompressedSequence<T_Sequence> &rhs); 

    void MakeRC(CompressedSequence &rc); 

    Nucleotide operator[](DNALength i); 

    Nucleotide GetNuc(DNALength i); 

    unsigned char GetCount(DNALength i); 

    char *GetName();

    void Copy(FASTASequence &rhs); 

    float GetAverageQuality(); 

    void SortHomopolymerQualities(); 

    CompressedSequence(); 
    ~CompressedSequence(); 

    void Free(); 

    void SetHasTitle(); 

    void SetHasIndex(); 

    void Write(std::string outFileName); 

    void Read(std::string inFileName); 

    int BuildFourBitReverseIndex(int binSize); 

    int BuildReverseIndex(int maxRun, int binSize); 

    GenomeLength Lookup4BitCompressedSequencePos(int cpPos); 

    int LookupSequencePos(int cpPos); 

    char GetCount(unsigned char ch); 

    DNALength FourBitCompressHomopolymers(); 

    static int Only4BitACTG(CompressedNucleotide *seq, int seqLength); 

    int Only4BitACTG(); 

    void RemoveCompressionCounts(); 

    DNALength FourBitDecompressHomopolymers(int start, int end,
            T_Sequence &decompSeq); 

    DNALength CondenseHomopolymers();
};

#include "CompressedSequenceImpl.hpp"

#endif // _BLASR_COMPRESSED_SEQUENCE_HPP_