File: HDFZMWWriter.hpp

package info (click to toggle)
pbseqlib 5.3.4%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 7,020 kB
  • sloc: cpp: 77,246; python: 331; sh: 103; makefile: 42
file content (129 lines) | stat: -rw-r--r-- 3,966 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#ifndef _BLASR_HDF_HDFZMWWriter_HPP_
#define _BLASR_HDF_HDFZMWWriter_HPP_

#include <LibBlasrConfig.h>

#ifdef USE_PBBAM

#include <pbbam/BamRecord.h>

#include <hdf/BufferedHDF2DArray.hpp>
#include <hdf/BufferedHDFArray.hpp>
#include <hdf/HDFWriterBase.hpp>
#include <pbdata/SMRTSequence.hpp>

class HDFBaseCallerWriter;
class HDFPulseCallerWriter;

class HDFZMWWriter : public HDFWriterBase
{

    friend class HDFBaseCallerWriter;
    friend class HDFPulseCallerWriter;

public:
    /// \name Constructors and Destructors
    /// \{
    HDFZMWWriter(const std::string& filename, HDFGroup& parentGroup);

    /// \params[in] filename
    /// \params[in] parentGroup
    /// \params[in] inPulseCalls, true if this ZMW is within PulseCalls.
    /// \params[in] baseMap, base to channel index in H5.
    HDFZMWWriter(const std::string& filename, HDFGroup& parentGroup, const bool inPulseCalls,
                 const std::map<char, size_t>& baseMap);

    ~HDFZMWWriter(void);
    /// \}

    /// \name Public Methods
    /// \{
    /// \returns true if this ZMW is within PulseCalls.
    bool InPulseCalls(void) const;

    /// \returns Whether or not to write BaseLineSigma
    bool HasBaseLineSigma(void) const;

    /// \note Write info of a SMRTSequence to ZMW,
    ///       (1) add number of pulses to NumEvent if InPulseCalls();
    ///           otherwise, add number of bases
    ///       (2) add zmw hole number (UInt) of the sequence as a UInt to HoleNumber,
    ///       (3) add hole status (unsigned char) to HoleStatus,
    ///       (4) add hole coordinate xy as (int16_t, int16_t) to HoleXY
    bool WriteOneZmw(const PacBio::BAM::BamRecord& read);

    bool WriteOneZmw(const SMRTSequence& read);

    /// \brief Write fake datasets under /PulseCalls/ZMW
    bool WriteFakeDataSets(void);

    uint32_t NumZMWs(void) const { return arrayLength_; }

    /// \note Flushes all data from cache to disc.
    void Flush(void);

    /// \note Closes this zmw group as well as child hdf groups.
    void Close(void);

    /// \}

private:
    /// \name Private Data
    /// ZMW/NumEvent
    BufferedHDFArray<uint32_t> numEventArray_;
    /// ZMW/HoleNumber
    BufferedHDFArray<unsigned int> holeNumberArray_;
    /// ZMW/HoleStatus
    BufferedHDFArray<unsigned char> holeStatusArray_;
    /// ZMW/HoleXY
    BufferedHDF2DArray<int16_t> holeXYArray_;
    /// ZMW/BaseLineSigma
    BufferedHDF2DArray<float> baseLineSigmaArray_;

    /// ZMW group
    HDFGroup zmwGroup_;
    /// Parent group PulseCalls or BaseCalls
    HDFGroup& parentGroup_;

    /// Map 'ACGT' to channel indices, defined in /ScanData/RunInfo/BaseMap
    std::map<char, size_t> baseMap_;
    /// true if parent gropu is PulseCalls.
    bool inPulseCalls_;
    /// dataset (array) size.
    uint32_t arrayLength_;
    /// \}

private:
    /// \name Private Methods
    /// \{

    /// \note Initialize child hdf groups under ZMW, including
    ///       NumEvent, HoleNumber, HoleStatus, HoleXY, and BaseLineSigma
    /// \reutrns bool, whether or not child hdf groups successfully initialized.
    bool InitializeChildHDFGroups(void);

    /// \note Write number of bases to ZMW/NumEvent if not InPulseCalls();
    ///       Otherwise, write number of pulses.
    bool _WriteNumEvent(const uint32_t numEvent);

    /// \note Write HoleNumber
    bool _WriteHoleNumber(const uint32_t holeNumber);

    /// \note Write HoleXY
    bool _WriteHoleXY(const int16_t holeX, const int16_t holeY);

    /// \note Always write 'SEQUENCINGZMW' to HoleStatus
    bool _WriteHoleStatus(const unsigned char holeStatus);

    /// \note Write BaseLineSigma if it is required to write and exists in read
    bool _WriteBaseLineSigma(const PacBio::BAM::BamRecord& read);

    /// \note Add attributes to HoleNumber, HoleXY, HoleStatus, HoleXY and BaseLineSigma
    void _WriteAttributes(void);

    /// \}
};

#endif  // end of #ifdef USE_PBBAM

#endif  // end of #ifndef _BLASR_HDF_HDFZMWWriter_HPP_