File: FileOfFileNames.cpp

package info (click to toggle)
pbseqlib 5.3.5%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 7,020 kB
  • sloc: cpp: 77,250; python: 331; sh: 103; makefile: 41
file content (97 lines) | stat: -rw-r--r-- 3,122 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#include <alignment/utils/FileOfFileNames.hpp>
#include <hdf/HDFNewBasReader.hpp>

#include <cstdlib>

void FileOfFileNames::StoreFileOrFileList(std::string fileName, std::vector<std::string> &fofnList)
{

    std::vector<std::string> tmpList;
    if (IsFOFN(fileName)) {
        FOFNToList(fileName, tmpList);
    } else {
        tmpList.push_back(fileName);
    }
    for (int i = 0; i < int(tmpList.size()); i++) {
        if (FileOfFileNames::IsFOFN(tmpList[i])) {
            std::cout << "ERROR. Nested File of File Names are not allowed. " << std::endl;
            std::exit(EXIT_FAILURE);
        } else if (FileOfFileNames::IsBasH5(tmpList[i])) {
            std::vector<std::string> baxFNs = FileOfFileNames::Bas2Bax(tmpList[i]);
            fofnList.insert(fofnList.end(), baxFNs.begin(), baxFNs.end());
        } else {
            fofnList.push_back(tmpList[i]);
        }
    }
}

void FileOfFileNames::FOFNToList(std::string &fofnFileName, std::vector<std::string> &fofnList)
{
    std::ifstream fofnIn;
    CrucialOpen(fofnFileName, fofnIn);
    while (fofnIn) {
        std::string name;
        std::getline(fofnIn, name);
        if (name.size() > 0) {
            fofnList.push_back(name);
        }
    }
}

bool FileOfFileNames::IsFOFN(std::string &fileName)
{
    std::string::size_type dotPos = fileName.rfind(".");
    if (dotPos != std::string::npos) {
        std::string extension;
        extension.assign(fileName, dotPos + 1, fileName.size() - (dotPos + 1));
        if (extension == "fofn") {
            return true;
        }
    }
    return false;
}

bool FileOfFileNames::IsBasH5(std::string &fileName)
{
    // Return true if file ends with bas.h5
    if (fileName.size() > 6) {
        if (fileName.rfind("bas.h5") == fileName.size() - 6) {
            return true;
        }
    }
    return false;
}

std::vector<std::string> FileOfFileNames::Bas2Bax(std::string &basFN)
{
    // There are two types of bas.h5 files.
    // Before SMRT 2.0, bas.h5 files contain all the /PulseData data,
    // in this case, return the bas.h5.
    // After SMRT 2.0, bas.h5 files have been changed to only contain
    // paths to bax.h5 files (in the /MultiPart/Parts group), while
    // all base calls and QVs are in bax.h5 files. In this case,
    // return path to the bax.h5 files. Assumption is that bax.h5
    // files are in the same directory as bas.h5 file.
    std::vector<std::string> baxFNs;
    HDFNewBasReader reader;
    if (reader.Initialize(basFN) != 0) {
        baxFNs = reader.GetBaxFileNames();
    } else {
        baxFNs.push_back(basFN);
    }
    reader.Close();
    return baxFNs;
}

int FileOfFileNames::ExpandFileNameList(std::vector<std::string> &fileNames)
{
    int rfn;
    std::vector<std::string> expandedFileNames;
    for (rfn = 0; rfn < static_cast<int>(fileNames.size()); rfn++) {
        std::vector<std::string> tmpList;
        FileOfFileNames::StoreFileOrFileList(fileNames[rfn], tmpList);
        expandedFileNames.insert(expandedFileNames.end(), tmpList.begin(), tmpList.end());
    }
    fileNames = expandedFileNames;
    return fileNames.size();
}