File: CompressedDNASequence.hpp

package info (click to toggle)
pbseqlib 5.3.4%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 7,020 kB
  • sloc: cpp: 77,246; python: 331; sh: 103; makefile: 42
file content (82 lines) | stat: -rw-r--r-- 2,366 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#ifndef _BLASR_COMPRESSED_DNA_SEQUENCE_HPP_
#define _BLASR_COMPRESSED_DNA_SEQUENCE_HPP_

#include <cstring>

#include <pbdata/defs.h>
#include <pbdata/Compare4BitCompressed.hpp>
#include <pbdata/DNASequence.hpp>
#include <pbdata/FASTASequence.hpp>
#include <pbdata/qvs/QualityValue.hpp>

typedef unsigned char CompressedNucleotide;

class CompressedDNASequence : public DNASequence
{
    static const unsigned char MaskCount = 0xf;
    static const unsigned char MaskNuc = 0xf0;
    static const unsigned char ShiftCount = 4;

public:
    char *title;
    int titleLength;
    //
    // This is just a placeholder for now.
    // No extra data here, just the ability to decompress.  Right now
    // the utilities for the compressed dna sequences
    // are in CompressedSeqUtils.h, which could move here later.
    //
    QualityValue *qual;

    CompressedDNASequence()
    {
        const char t[] = "Compressed sequence\0";
        titleLength = strlen(t);
        title = ProtectedNew<char>(titleLength + 1);
        strcpy(title, t);
        title[titleLength] = '\0';
    }

    void MakeRC(CompressedDNASequence &rc)
    {
        rc.Allocate(length);
        DNALength i;
        for (i = 0; i < length; i++) {
            rc.seq[length - i - 1] = ReverseComplementNuc[ThreeBit[seq[i] & MaskCount]];
            rc.seq[length - i - 1] += (seq[i] & MaskNuc);
        }
        memcpy(rc.title, title, titleLength);
        rc.titleLength = titleLength;
    }

    Nucleotide operator[](DNALength i) { return GetNuc(i); }

    Nucleotide GetNuc(DNALength i) { return (seq[i] & MaskCount); }

    unsigned char GetCount(DNALength i) { return seq[i] >> ShiftCount; }

    char *GetName() { return (char *)title; }

    void Copy(FASTASequence &rhs)
    {
        seq = ProtectedNew<CompressedNucleotide>(rhs.length);
        memcpy(seq, rhs.seq, rhs.length);
        length = rhs.length;
        if (title != NULL) {
            delete[] title;
        }
        title = ProtectedNew<char>(rhs.titleLength + 1);
        memcpy(title, rhs.title, rhs.titleLength);
        titleLength = rhs.titleLength;
        title[titleLength] = '\0';
    }
    float GetAverageQuality() { return 0.0; }

    void SortHomopolymerQualities()
    {
        std::cout << "qualities are not implemented for compressed sequences." << std::endl;
        assert(0);
    }
};

#endif