File: archive.cpp

package info (click to toggle)
martchus-cpp-utilities 5.33.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 1,396 kB
  • sloc: cpp: 12,679; awk: 18; ansic: 12; makefile: 10
file content (265 lines) | stat: -rw-r--r-- 9,125 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
#include "./archive.h"

#include "../conversion/stringbuilder.h"

#include <archive.h>
#include <archive_entry.h>

#include <filesystem>
#include <utility>

using namespace CppUtilities;

namespace CppUtilities {

/*!
 * \brief Destroys the ArchiveException.
 */
ArchiveException::~ArchiveException()
{
}

/// \cond
///
struct AddDirectoryToFileMap {
    bool operator()(std::string_view path)
    {
        fileMap[std::string(path)];
        return false;
    }
    FileMap &fileMap;
};

struct AddFileToFileMap {
    bool operator()(std::string_view directoryPath, ArchiveFile &&file)
    {
        fileMap[std::string(directoryPath)].emplace_back(std::move(file));
        return false;
    }
    FileMap &fileMap;
};

struct ArchiveHandle {
    explicit ArchiveHandle()
        : handle(archive_read_new())
    {
    }

    ~ArchiveHandle()
    {
        if (handle) {
            archive_read_free(handle);
        }
    }

    ArchiveHandle(const ArchiveHandle &) = delete;
    ArchiveHandle(ArchiveHandle &&) = delete;

    void close(std::string_view archiveName, std::string_view errorMessage)
    {
        if (!handle) {
            return;
        }
        if (const auto returnCode = archive_read_free(std::exchange(handle, nullptr)); returnCode != ARCHIVE_OK) {
            throw ArchiveException(errorMessage.empty() ? argsToString("Unable to free archive \"", archiveName, '\"')
                                                        : argsToString("Unable to free archive \"", archiveName, "\" after error: ", errorMessage));
        }
    }

    operator struct archive *()
    {
        return handle;
    }

    struct archive *handle;
};

void walkThroughArchiveInternal(ArchiveHandle &ar, std::string_view archiveName, const FilePredicate &isFileRelevant, FileHandler &&fileHandler,
    DirectoryHandler &&directoryHandler)
{
    // iterate through all archive entries
    struct archive_entry *const entry = archive_entry_new();
    auto fileContent = std::string();
    while (archive_read_next_header2(ar, entry) == ARCHIVE_OK) {
        // check entry type (only dirs, files and symlinks relevant here)
        const auto entryType(archive_entry_filetype(entry));
        if (entryType != AE_IFDIR && entryType != AE_IFREG && entryType != AE_IFLNK) {
            continue;
        }

        // get file path
        const char *filePath = archive_entry_pathname_utf8(entry);
        if (!filePath) {
            filePath = archive_entry_pathname(entry);
        }
        if (!filePath) {
            continue;
        }

        // get permissions
        const mode_t perm = archive_entry_perm(entry);

        // add directories explicitly to get the entire tree though skipping irrelevant files
        if (entryType == AE_IFDIR) {
            if (!directoryHandler) {
                continue;
            }
            // remove trailing slashes
            const char *dirEnd = filePath;
            for (const char *i = filePath; *i; ++i) {
                if (*i != '/') {
                    dirEnd = i + 1;
                }
            }
            if (directoryHandler(std::string_view(filePath, static_cast<std::size_t>(dirEnd - filePath)))) {
                goto free;
            }
            continue;
        }

        // split the path into dir and fileName
        const char *fileName = filePath, *dirEnd = filePath;
        for (const char *i = filePath; *i; ++i) {
            if (*i == '/') {
                fileName = i + 1;
                dirEnd = i;
            }
        }

        // prevent looking into irrelevant files
        if (isFileRelevant && !isFileRelevant(filePath, fileName, perm)) {
            continue;
        }

        // read timestamps
        const auto creationTime = DateTime::fromTimeStampGmt(archive_entry_ctime(entry));
        const auto modificationTime = DateTime::fromTimeStampGmt(archive_entry_mtime(entry));

        // read symlink
        if (entryType == AE_IFLNK) {
            if (fileHandler(std::string_view(filePath, static_cast<std::string::size_type>(dirEnd - filePath)),
                    ArchiveFile(fileName, std::string(archive_entry_symlink_utf8(entry)), ArchiveFileType::Link, creationTime, modificationTime))) {
                goto free;
            }
            continue;
        }

        // determine file size to pre-allocate buffer for file content
        const la_int64_t fileSize = archive_entry_size(entry);
        fileContent.clear();
        if (fileSize > 0) {
            fileContent.reserve(static_cast<std::string::size_type>(fileSize));
        }

        // read file content
        const char *buff;
        auto size = std::size_t();
        auto offset = la_int64_t();
        for (;;) {
            const auto returnCode = archive_read_data_block(ar, reinterpret_cast<const void **>(&buff), &size, &offset);
            if (returnCode == ARCHIVE_EOF || returnCode < ARCHIVE_OK) {
                break;
            }
            fileContent.append(buff, size);
        }

        // move it to results
        if (fileHandler(std::string_view(filePath, static_cast<std::string::size_type>(dirEnd - filePath)),
                ArchiveFile(fileName, std::move(fileContent), ArchiveFileType::Regular, creationTime, modificationTime))) {
            goto free;
        }
    }

free:
    // check for errors
    const auto *const archiveError = archive_error_string(ar);
    const auto errorMessage = archiveError ? std::string(archiveError) : std::string();

    // free resources used by libarchive
    archive_entry_free(entry);
    ar.close(archiveName, errorMessage);
    if (archiveError) {
        throw ArchiveException(argsToString("An error occurred when reading archive \"", archiveName, "\": ", errorMessage));
    }
}

/// \endcond

/*!
 * \brief Invokes callbacks for files and directories in the specified archive.
 */
void walkThroughArchiveFromBuffer(std::string_view archiveData, std::string_view archiveName, const FilePredicate &isFileRelevant,
    FileHandler &&fileHandler, DirectoryHandler &&directoryHandler)
{
    // refuse opening empty buffer
    if (archiveData.empty()) {
        throw ArchiveException("Unable to open archive \"" % archiveName + "\": archive data is empty");
    }
    // open archive buffer using libarchive
    auto ar = ArchiveHandle();
    archive_read_support_filter_all(ar);
    archive_read_support_format_all(ar);
    const auto returnCode = archive_read_open_memory(ar, archiveData.data(), archiveData.size());
    if (returnCode != ARCHIVE_OK) {
        if (const char *const error = archive_error_string(ar)) {
            throw ArchiveException("Unable to open/read archive \"" % archiveName % "\": " + error);
        } else {
            throw ArchiveException("Unable to open/read archive \"" % archiveName + "\": unable to open archive from memory");
        }
    }
    walkThroughArchiveInternal(ar, archiveName, isFileRelevant, std::move(fileHandler), std::move(directoryHandler));
}

/*!
 * \brief Extracts the specified archive.
 */
FileMap extractFilesFromBuffer(std::string_view archiveData, std::string_view archiveName, const FilePredicate &isFileRelevant)
{
    auto results = FileMap();
    walkThroughArchiveFromBuffer(archiveData, archiveName, isFileRelevant, AddFileToFileMap{ results }, AddDirectoryToFileMap{ results });
    return results;
}

/*!
 * \brief Invokes callbacks for files and directories in the specified archive.
 */
void walkThroughArchive(
    std::string_view archivePath, const FilePredicate &isFileRelevant, FileHandler &&fileHandler, DirectoryHandler &&directoryHandler)
{
    // open archive file using libarchive
    if (archivePath.empty()) {
        throw ArchiveException("Unable to open archive: no path specified");
    }
    auto ec = std::error_code();
    auto size = std::filesystem::file_size(archivePath, ec);
    if (ec) {
        throw ArchiveException("Unable to determine size of \"" % archivePath % "\": " + ec.message());
    }
    if (!size) {
        throw ArchiveException("Unable to open archive \"" % archivePath + "\": file is empty");
    }
    auto ar = ArchiveHandle();
    archive_read_support_filter_all(ar);
    archive_read_support_format_all(ar);
    const auto returnCode = archive_read_open_filename(ar, archivePath.data(), 10240);
    if (returnCode != ARCHIVE_OK) {
        if (const char *const error = archive_error_string(ar)) {
            throw ArchiveException("Unable to open/read archive \"" % archivePath % "\": " + error);
        } else {
            throw ArchiveException("Unable to open/read archive \"" % archivePath + "\": unable to open archive from file");
        }
    }
    walkThroughArchiveInternal(ar, archivePath, isFileRelevant, std::move(fileHandler), std::move(directoryHandler));
}

/*!
 * \brief Extracts the specified archive.
 */
FileMap extractFiles(std::string_view archivePath, const FilePredicate &isFileRelevant)
{
    auto results = FileMap();
    walkThroughArchive(archivePath, isFileRelevant, AddFileToFileMap{ results }, AddDirectoryToFileMap{ results });
    return results;
}

} // namespace CppUtilities