File: HDFPulseDataFile.cpp

package info (click to toggle)
pbseqlib 5.3.5%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 7,020 kB
  • sloc: cpp: 77,250; python: 331; sh: 103; makefile: 41
file content (159 lines) | stat: -rw-r--r-- 4,950 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
#include <hdf/HDFPulseDataFile.hpp>

DSLength HDFPulseDataFile::GetAllReadLengths(std::vector<DNALength> &readLengths)
{
    nReads = static_cast<UInt>(zmwReader.numEventArray.arrayLength);
    readLengths.resize(nReads);
    zmwReader.numEventArray.Read(0, nReads, &readLengths[0]);
    return readLengths.size();
}

void HDFPulseDataFile::CheckMemoryAllocation(long allocSize, long allocLimit, const char *fieldName)
{
    if (allocSize > allocLimit) {
        if (fieldName == NULL) {
            std::cout << "Allocating too large of memory" << std::endl;
        } else {
            std::cout << "Allocate size " << allocSize << " > allocate limit " << allocLimit
                      << std::endl;
            std::cout << "ERROR! Reading the dataset " << fieldName << " will use too much memory."
                      << std::endl;
            std::cout << "The pls/bas file is too large, exiting." << std::endl;
        }
        std::exit(EXIT_FAILURE);
    }
}

HDFPulseDataFile::HDFPulseDataFile()
{
    pulseDataGroupName = "PulseData";
    nReads = 0;
    useScanData = false;
    closeFileOnExit = false;
    maxAllocNElements = INT_MAX;
    preparedForRandomAccess = false;
    rootGroupPtr = NULL;
}

void HDFPulseDataFile::PrepareForRandomAccess()
{
    std::vector<DNALength> offset_;
    GetAllReadLengths(offset_);
    // type of read length of a single read : DNALength
    // type of total read length of all reads in plx.h5: DSLength
    eventOffset.assign(offset_.begin(), offset_.end());
    DSLength curOffset = 0;
    for (size_t i = 0; i < eventOffset.size(); i++) {
        DSLength curLength = eventOffset[i];
        eventOffset[i] = curOffset;
        curOffset = curOffset + curLength;
    }
    nReads = static_cast<UInt>(eventOffset.size());
    preparedForRandomAccess = true;
}

int HDFPulseDataFile::OpenHDFFile(std::string fileName, const H5::FileAccPropList &fileAccPropList)
{

    try {
        H5::FileAccPropList propList = fileAccPropList;
        H5::Exception::dontPrint();
        hdfBasFile.openFile(fileName.c_str(), H5F_ACC_RDONLY, propList);
    } catch (H5::Exception &e) {
        std::cout << "ERROR, could not open hdf file" << fileName << ", exiting." << std::endl;
        std::exit(EXIT_FAILURE);
    }
    closeFileOnExit = true;
    return 1;
}

//
// All pulse data files contain the "PulseData" group name.
//
//
int HDFPulseDataFile::InitializePulseDataFile(std::string fileName,
                                              const H5::FileAccPropList &fileAccPropList)
{

    if (OpenHDFFile(fileName, fileAccPropList) == 0) return 0;
    return 1;
}

int HDFPulseDataFile::Initialize(std::string fileName, const H5::FileAccPropList &fileAccPropList)
{

    if (InitializePulseDataFile(fileName, fileAccPropList) == 0) {
        return 0;
    }
    //
    // The pulse group is contained directly below the root group.
    //
    if (rootGroup.Initialize(hdfBasFile, "/") == 0) {
        return 0;
    }
    rootGroupPtr = &rootGroup;
    return Initialize();
}

//
// Initialize inside another open group.
//
int HDFPulseDataFile::Initialize(HDFGroup *rootGroupP)
{
    rootGroupPtr = rootGroupP;
    return Initialize();
}

//
// Initialize all fields
//
int HDFPulseDataFile::Initialize()
{
    preparedForRandomAccess = false;
    if (InitializePulseGroup() == 0) return 0;
    if (rootGroupPtr->ContainsObject("ScanData")) {
        if (scanDataReader.Initialize(rootGroupPtr) == 0) {
            return 0;
        } else {
            useScanData = true;
        }
    }
    return 1;
}

int HDFPulseDataFile::InitializePulseGroup()
{
    if (pulseDataGroup.Initialize(rootGroupPtr->group, pulseDataGroupName) == 0) return 0;
    return 1;
}

size_t HDFPulseDataFile::GetAllHoleNumbers(std::vector<unsigned int> &holeNumbers)
{
    CheckMemoryAllocation(zmwReader.holeNumberArray.arrayLength, maxAllocNElements,
                          "HoleNumbers (base)");
    holeNumbers.resize(nReads);
    zmwReader.holeNumberArray.Read(0, nReads, &holeNumbers[0]);
    return holeNumbers.size();
}

void HDFPulseDataFile::Close()
{
    if (useScanData) {
        scanDataReader.Close();
    }

    pulseDataGroup.Close();
    if (rootGroupPtr == &rootGroup) {
        rootGroup.Close();
    }
    /*
       std::cout << "there are " <<  hdfBasFile.getObjCount(H5F_OBJ_FILE) << " open files upon closing." <<std::endl;
       std::cout << "there are " <<  hdfBasFile.getObjCount(H5F_OBJ_DATASET) << " open datasets upon closing." <<std::endl;
       std::cout << "there are " <<  hdfBasFile.getObjCount(H5F_OBJ_GROUP) << " open groups upon closing." <<std::endl;
       std::cout << "there are " <<  hdfBasFile.getObjCount(H5F_OBJ_DATATYPE) << " open datatypes upon closing." <<std::endl;
       std::cout << "there are " <<  hdfBasFile.getObjCount(H5F_OBJ_ATTR) << " open attributes upon closing." <<std::endl;
       */
    if (closeFileOnExit) {
        hdfBasFile.close();
    }
}