File: HDFRegionTableReader.cpp

package info (click to toggle)
pbseqlib 5.3.4%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 7,020 kB
  • sloc: cpp: 77,246; python: 331; sh: 103; makefile: 42
file content (148 lines) | stat: -rw-r--r-- 4,394 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#include <hdf/HDFRegionTableReader.hpp>

#include <cassert>

int HDFRegionTableReader::Initialize(std::string &regionTableFileName,
                                     const H5::FileAccPropList &fileAccPropList)
{
    /*
     * Initialize access to the HDF file.
     */
    try {
        regionTableFile.Open(regionTableFileName.c_str(), H5F_ACC_RDONLY, fileAccPropList);
    } catch (H5::Exception &e) {
        std::cout << e.getDetailMsg() << std::endl;
        return 0;
    }
    if (pulseDataGroup.Initialize(regionTableFile.rootGroup, "PulseData") == 0) {
        return 0;
    }
    if (pulseDataGroup.ContainsObject("Regions") == 0) {
        fileContainsRegionTable = false;
        return 0;
    } else {
        fileContainsRegionTable = true;
    }

    if (regions.Initialize(pulseDataGroup, "Regions") == 0) {
        return 0;
    }

    if (columnNames.Initialize(regions, "ColumnNames") == 0) {
        return 0;
    }
    if (regionTypes.Initialize(regions, "RegionTypes") == 0) {
        return 0;
    }
    if (regionDescriptions.Initialize(regions, "RegionDescriptions") == 0) {
        return 0;
    }
    if (regionSources.Initialize(regions, "RegionSources") == 0) {
        return 0;
    }

    nRows = regions.GetNRows();
    isInitialized_ = true;
    curRow = 0;
    return 1;
}

bool HDFRegionTableReader::IsInitialized(void) const { return isInitialized_; }

bool HDFRegionTableReader::HasRegionTable(void) const
{
    assert(IsInitialized() && "HDFRegionTable is not initialize!");
    return fileContainsRegionTable;
}

int HDFRegionTableReader::GetNext(RegionAnnotation &annotation)
{
    assert(IsInitialized() && "HDFRegionTable is not initialize!");
    //
    // Bail with no-op if this is the last row.
    //
    if (fileContainsRegionTable == false) {
        return 0;
    }

    if (curRow == nRows) {
        return 0;
    }

    regions.Read(curRow, curRow + 1, annotation.row);
    ++curRow;
    return 1;
}

void HDFRegionTableReader::Close()
{
    isInitialized_ = false;
    fileContainsRegionTable = false;
    columnNames.Close();
    regionTypes.Close();
    regionDescriptions.Close();
    regionSources.Close();
    pulseDataGroup.Close();
    regions.Close();
    regionTableFile.Close();
}

// Note that (1) there is NO GUARANTEE that region annotations in hdf5
// `Regions` dataset be sorted in any order, so we cannot iterate over
// `Regions` in order to traverse zmws in order.
// (2) region table of a million zmws is approximately 5M.
void HDFRegionTableReader::ReadTable(RegionTable &table)
{
    assert(IsInitialized() && "HDFRegionTable is not initialize!");
    table.Reset();

    if (fileContainsRegionTable) {
        // Read attributes.
        std::vector<std::string> names, types, descs, sources;
        if (columnNames.IsInitialized()) columnNames.Read(names);
        if (regionTypes.IsInitialized())
            regionTypes.Read(types);
        else {
            std::cout << "ERROR MUST HAVE REGIONTYPES" << std::endl;
            std::exit(EXIT_FAILURE);
        }
        if (regionDescriptions.IsInitialized()) regionDescriptions.Read(descs);
        if (regionSources.IsInitialized()) regionSources.Read(sources);

        // Read region annotations
        std::vector<RegionAnnotation> ras;
        ras.resize(nRows);
        assert(curRow == 0);
        for (; curRow < nRows; curRow++) {
            regions.Read(curRow, curRow + 1, ras[curRow].row);
        }

        // Reconstruct table
        table.ConstructTable(ras, types);
        table.ColumnNames(names);
        table.RegionDescriptions(descs);
        table.RegionSources(sources);
    }
}

void HDFRegionTableReader::GetMinMaxHoleNumber(UInt &minHole, UInt &maxHole)
{
    assert(IsInitialized() && "HDFRegionTable is not initialize!");
    // Hole numbers may not be sorted ascendingly, so do not
    // return the first and last hole numbers as the min and max.
    UInt saveCurRow = curRow;
    curRow = 0;
    bool init = false;
    RegionAnnotation annotation;
    while (GetNext(annotation) == 1) {
        UInt curHole = annotation.GetHoleNumber();
        if (not init) {
            minHole = maxHole = curHole;
            init = true;
        } else {
            minHole = (minHole > curHole) ? (curHole) : (minHole);
            maxHole = (maxHole < curHole) ? (curHole) : (maxHole);
        }
    }
    curRow = saveCurRow;
}