File: HDFUtils.hpp

package info (click to toggle)
pbseqlib 5.3.4%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 7,020 kB
  • sloc: cpp: 77,246; python: 331; sh: 103; makefile: 42
file content (41 lines) | stat: -rw-r--r-- 1,599 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#ifndef _BLASR_HDF_UTILS_HPP_
#define _BLASR_HDF_UTILS_HPP_

#include <string>
#include <vector>

#include <hdf/HDFBasReader.hpp>
#include <hdf/HDFFile.hpp>
#include <hdf/HDFRegionTableReader.hpp>
#include <hdf/HDFScanDataReader.hpp>
#include <pbdata/reads/RegionTable.hpp>

// Given a PacBio (pls/plx/bas/bax/ccs/rgn).h5 file, which contains its movie
// name in group /ScanData/RunInfo attribute MovieName, return its' movie name
std::string GetH5MovieName(std::string fileName);

// Given a vector of h5 files, return their movie names.
std::vector<std::string> GetH5MovieNames(const std::vector<std::string>& fileNames);

// Given a PacBio rgn.h5 file, return the smallest and largest holeNumber in
// group /PulseData/Regions.
std::pair<UInt, UInt> GetMinMaxHoleNumber(std::string fileName, bool isRGN = false);

std::vector<std::pair<UInt, UInt> > GetMinMaxHoleNumbers(std::string fileName, bool isRGN = false);

// Pulse files in input.fofn and regions tables in rgn.fofn may not
// match, return mapping from plsFNs indices to rgnFNs indices.
//
// Input : plsFNs - pulse file names in input.fofn, e.g.,
//                  P=(p_0, ..., p_{n-1})
//         rgnFNs - region table file names in rgn.fofn, e.g.,
//                  R=(r_0, ..., p_{n-1})
// Output: mapping from plsFNs indices to rgnFNs indices, e.g.,
//                  M=(m_0, ..., m_{n-1})
//         so that for all i from 0 to n-1,
//                  r_{m_{i}} matches p_i
//
std::vector<int> MapPls2Rgn(const std::vector<std::string>& plsFNs,
                            const std::vector<std::string>& rgnFNs);

#endif