1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
|
#include <alignment/utils/FileOfFileNames.hpp>
#include <hdf/HDFNewBasReader.hpp>
#include <cstdlib>
void FileOfFileNames::StoreFileOrFileList(std::string fileName, std::vector<std::string> &fofnList)
{
std::vector<std::string> tmpList;
if (IsFOFN(fileName)) {
FOFNToList(fileName, tmpList);
} else {
tmpList.push_back(fileName);
}
for (int i = 0; i < int(tmpList.size()); i++) {
if (FileOfFileNames::IsFOFN(tmpList[i])) {
std::cout << "ERROR. Nested File of File Names are not allowed. " << std::endl;
std::exit(EXIT_FAILURE);
} else if (FileOfFileNames::IsBasH5(tmpList[i])) {
std::vector<std::string> baxFNs = FileOfFileNames::Bas2Bax(tmpList[i]);
fofnList.insert(fofnList.end(), baxFNs.begin(), baxFNs.end());
} else {
fofnList.push_back(tmpList[i]);
}
}
}
void FileOfFileNames::FOFNToList(std::string &fofnFileName, std::vector<std::string> &fofnList)
{
std::ifstream fofnIn;
CrucialOpen(fofnFileName, fofnIn);
while (fofnIn) {
std::string name;
std::getline(fofnIn, name);
if (name.size() > 0) {
fofnList.push_back(name);
}
}
}
bool FileOfFileNames::IsFOFN(std::string &fileName)
{
std::string::size_type dotPos = fileName.rfind(".");
if (dotPos != std::string::npos) {
std::string extension;
extension.assign(fileName, dotPos + 1, fileName.size() - (dotPos + 1));
if (extension == "fofn") {
return true;
}
}
return false;
}
bool FileOfFileNames::IsBasH5(std::string &fileName)
{
// Return true if file ends with bas.h5
if (fileName.size() > 6) {
if (fileName.rfind("bas.h5") == fileName.size() - 6) {
return true;
}
}
return false;
}
std::vector<std::string> FileOfFileNames::Bas2Bax(std::string &basFN)
{
// There are two types of bas.h5 files.
// Before SMRT 2.0, bas.h5 files contain all the /PulseData data,
// in this case, return the bas.h5.
// After SMRT 2.0, bas.h5 files have been changed to only contain
// paths to bax.h5 files (in the /MultiPart/Parts group), while
// all base calls and QVs are in bax.h5 files. In this case,
// return path to the bax.h5 files. Assumption is that bax.h5
// files are in the same directory as bas.h5 file.
std::vector<std::string> baxFNs;
HDFNewBasReader reader;
if (reader.Initialize(basFN) != 0) {
baxFNs = reader.GetBaxFileNames();
} else {
baxFNs.push_back(basFN);
}
reader.Close();
return baxFNs;
}
int FileOfFileNames::ExpandFileNameList(std::vector<std::string> &fileNames)
{
int rfn;
std::vector<std::string> expandedFileNames;
for (rfn = 0; rfn < static_cast<int>(fileNames.size()); rfn++) {
std::vector<std::string> tmpList;
FileOfFileNames::StoreFileOrFileList(fileNames[rfn], tmpList);
expandedFileNames.insert(expandedFileNames.end(), tmpList.begin(), tmpList.end());
}
fileNames = expandedFileNames;
return fileNames.size();
}
|