File: HDFUtils.cpp

package info (click to toggle)
pbseqlib 5.3.4%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 7,020 kB
  • sloc: cpp: 77,246; python: 331; sh: 103; makefile: 42
file content (83 lines) | stat: -rw-r--r-- 2,929 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#include <hdf/HDFUtils.hpp>

std::string GetH5MovieName(std::string fileName)
{
    HDFScanDataReader reader;
    return reader.GetMovieName_and_Close(fileName);
}

std::vector<std::string> GetH5MovieNames(const std::vector<std::string>& fileNames)
{
    std::vector<std::string> ret;
    for (size_t i = 0; i < fileNames.size(); i++) {
        ret.push_back(GetH5MovieName(fileNames[i]));
    }
    return ret;
}

std::vector<std::pair<UInt, UInt> > GetMinMaxHoleNumbers(const std::vector<std::string>& fileNames,
                                                         bool isRGN)
{
    std::vector<std::pair<UInt, UInt> > ret;
    for (size_t i = 0; i < fileNames.size(); i++) {
        ret.push_back(GetMinMaxHoleNumber(fileNames[i], isRGN));
    }
    return ret;
}

std::pair<UInt, UInt> GetMinMaxHoleNumber(std::string fileName, bool isRGN)
{
    UInt minHole, maxHole;

    if (isRGN) {  // is region table
        HDFRegionTableReader rgnReader;
        rgnReader.Initialize(fileName);
        rgnReader.GetMinMaxHoleNumber(minHole, maxHole);
        rgnReader.Close();
    } else {  // is bas/bax/pls/plx/ccs.h5
        HDFBasReader basReader;
        basReader.Initialize(fileName);
        std::vector<UInt> holes;
        basReader.GetMinMaxHoleNumber(minHole, maxHole);
        basReader.Close();
    }
    return std::make_pair(minHole, maxHole);
}

std::vector<int> MapPls2Rgn(const std::vector<std::string>& plsFNs,
                            const std::vector<std::string>& rgnFNs)
{
    if (plsFNs.size() != rgnFNs.size() && rgnFNs.size() != 0) {
        std::cout << "ERROR, the number of plx/bax.h5 files and the number of "
                  << "region tables are not the same." << std::endl;
        std::exit(EXIT_FAILURE);
    }

    // Movie names of pulse files in P.
    std::vector<std::string> plsMovies = GetH5MovieNames(plsFNs);
    // Movie names of region tables in R.
    std::vector<std::string> rgnMovies = GetH5MovieNames(rgnFNs);

    // The first and last hole numbers of pulse files in P.
    std::vector<std::pair<UInt, UInt> > plsHoles = GetMinMaxHoleNumbers(plsFNs, false);
    // The first and last hole numbers of region tables in R.
    std::vector<std::pair<UInt, UInt> > rgnHoles = GetMinMaxHoleNumbers(rgnFNs, true);

    std::vector<int> ret;
    for (size_t i = 0; i < plsFNs.size(); i++) {
        size_t j = 0;
        for (; j < rgnFNs.size(); j++) {
            if (plsMovies[i] == rgnMovies[j] and plsHoles[i].first <= rgnHoles[j].first and
                plsHoles[i].second >= rgnHoles[j].second) {
                break;
            }
        }
        if (j >= rgnFNs.size()) {
            std::cout << "ERROR, could not find any region table for file " << plsFNs[i] << " ["
                      << plsHoles[i].first << ", " << plsHoles[i].second << "." << std::endl;
            std::exit(EXIT_FAILURE);
        }
        ret.push_back(j);
    }
    return ret;
}