1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
|
#include "fileio.h"
#include <unistd.h>
#include <dirent.h>
#include <string.h>
//#include "aclogger.h"
#include <set>
#include "meta.h"
#include "dirwalk.h"
using namespace std;
namespace acng
{
namespace cfg
{
extern int stupidfs;
}
struct dnode
{
typedef pair<dev_t,ino_t> tPairDevIno;
typedef set<tPairDevIno> tDupeFilter;
dnode(dnode *parent) : m_parent(parent) {};
bool Walk(IFileHandler *, tDupeFilter*, bool bFollowSymlinks);
std::string sPath;
dnode *m_parent;
struct stat m_stinfo;
private:
// not to be copied
dnode& operator=(const dnode&);
dnode(const dnode&);
};
bool dnode::Walk(IFileHandler *h, dnode::tDupeFilter *pFilter, bool bFollowSymlinks)
{
// bool bNix=StrHas(sPath, "somehost");
if(bFollowSymlinks)
{
if(stat(sPath.c_str(), &m_stinfo))
return true; // slight risk of missing information here... bug ignoring is safer
}
else
{
auto r=lstat(sPath.c_str(), &m_stinfo);
if(r)
{
/* errnoFmter f;
log::err(tSS() << sPath <<
" IO error [" << f<<"]");
*/
return true; // slight risk of missing information here... bug ignoring is safer
}
// yeah, and we ignore symlinks here
if(S_ISLNK(m_stinfo.st_mode))
return true;
}
if(S_ISREG(m_stinfo.st_mode)
#ifdef DEBUG
|| S_ISBLK(m_stinfo.st_mode)
#endif
)
return h->ProcessRegular(sPath, m_stinfo);
else if(! S_ISDIR(m_stinfo.st_mode))
return h->ProcessOthers(sPath, m_stinfo);
// ok, we are a directory, scan it and descend where needed
// seen this in the path before? symlink cycle?
for(dnode *cur=m_parent; cur!=nullptr; cur=cur->m_parent)
{
if (m_stinfo.st_dev == cur->m_stinfo.st_dev && m_stinfo.st_ino == cur->m_stinfo.st_ino)
return true;
}
// also make sure we are not visiting the same directory through some symlink construct
if(pFilter)
{
#ifdef COMPATGCC47
auto thisKey(make_pair(m_stinfo.st_dev, m_stinfo.st_ino));
if(ContHas(*pFilter, thisKey))
return true;
pFilter->insert(thisKey);
#else
auto key_isnew = pFilter->emplace(m_stinfo.st_dev, m_stinfo.st_ino);
if(!key_isnew.second)
return true; // visited this before, recursion detected
#endif
}
// cerr << "Opening: " << sPath<<endl;
DIR *dir = opendir(sPath.c_str());
if (!dir) // weird, whatever... ignore...
return true;
struct dirent *dp;
dnode childbuf(this);
bool bRet(true);
while ( nullptr != (dp = readdir(dir)) )
{
if (strcmp(dp->d_name, ".") && strcmp(dp->d_name, ".."))
{
childbuf.sPath=sPath+sPathSepUnix;
if(cfg::stupidfs)
UrlUnescapeAppend(dp->d_name, childbuf.sPath);
else
childbuf.sPath+=dp->d_name;
bRet=childbuf.Walk(h, pFilter, bFollowSymlinks);
if(!bRet)
goto stop_walk;
}
}
// cerr << "Closing: " << sPath<<endl;
stop_walk:
if(dir)
closedir(dir);
return h->ProcessDirAfter(sPath, m_stinfo) && bRet;
}
bool IFileHandler::DirectoryWalk(const string & sRoot, IFileHandler *h, bool bFilterDoubleDirVisit,
bool bFollowSymlinks)
{
dnode root(nullptr);
dnode::tDupeFilter filter;
root.sPath=sRoot;
return root.Walk(h, bFilterDoubleDirVisit ? &filter : nullptr, bFollowSymlinks);
}
// XXX: create some shortcut? wasting CPU cycles for virtual call PLUS std::function wrapper
bool IFileHandler::FindFiles(const mstring & sRootDir, IFileHandler::output_receiver callBack, bool bFilterDoubleDirVisit,
bool bFollowSymlinks)
{
struct tFileGrabber : public IFileHandler
{
IFileHandler::output_receiver &m_cb;
bool ProcessRegular(cmstring &sPath, const struct stat &st) override { return m_cb(sPath, st);}
bool ProcessOthers(cmstring &sPath, const struct stat &) override {return true;};
bool ProcessDirAfter(cmstring &sPath, const struct stat &) override {return true;};
tFileGrabber(IFileHandler::output_receiver &ret) : m_cb(ret) {}
} cb(callBack);
return DirectoryWalk(sRootDir, &cb, bFilterDoubleDirVisit, bFollowSymlinks);
}
}
|