File: readfile.h

package info (click to toggle)
recoll 1.43.13-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 16,956 kB
  • sloc: cpp: 104,864; python: 9,923; xml: 7,324; ansic: 6,447; sh: 1,252; perl: 166; makefile: 73
file content (119 lines) | stat: -rw-r--r-- 4,885 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
/* Copyright (C) 2004-2025 J.F.Dockes
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU Lesser General Public License as published by
 *   the Free Software Foundation; either version 2.1 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU Lesser General Public License for more details.
 *
 *   You should have received a copy of the GNU Lesser General Public License
 *   along with this program; if not, write to the
 *   Free Software Foundation, Inc.,
 *   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */
#ifndef _READFILE_H_INCLUDED_
#define _READFILE_H_INCLUDED_

// Code for reading data which may be stored in plain or zlib format or insize a zip archive,
// and sending the data for processing by a sink object.
#include <sys/types.h>

#include <string>
#include <memory>

class FileScanUpstream;

/** Data sink, your code goes there. */
class FileScanDo {
public:
    FileScanDo() {}
    virtual ~FileScanDo() {}
    FileScanDo(const FileScanDo&) = delete;
    FileScanDo& operator=(const FileScanDo&) = delete;
    /* Initialize and allocate. 
     * @param size if set, lower bound of data size.
     * @param reason[output] set to error message in case of error.
     * @return false for error (file_scan will return), true if ok.
     */
    virtual bool init(int64_t /* size */, std::string * /* reason */) {return true;}
    /* Process chunk of data
     * @param buf  the data buffer.
     * @param cnt byte count.
     * @param reason[output] set to error message in case of error.
     * @return false for error (file_scan will return), true if ok.
     */
    virtual bool data(const char *buf, int cnt, std::string *reason) = 0;
    
    virtual void setUpstream(FileScanUpstream*) {}
};

/** Open and read file, calling the FileScanDo data() method for each chunk.
 *
 * @param filename File name. Use empty value for stdin

 * @param doer the data processor. The init() method will be called
 * initially witht a lower bound of the data size (may be used to
 * reserve a buffer), or with a 0 size if nothing is known about the
 * size. The data() method will be called for every chunk of data
 * read. 
 * @param offs Start offset. If not zero, will disable decompression 
 *             (set to -1 to start at 0 with no decompression).
 * @param cnt Max bytes in output. Set cnt to -1 for no limit.
 * @param[output] md5p If not null, points to a string to store the hex ascii 
 *     md5 of the uncompressed data.
 * @param[output] reason If not null, points to a string for storing an 
 *     error message if the return value is false.
 * @return true if the operation ended normally, else false.
 */
bool file_scan(const std::string& fn, FileScanDo* doer, int64_t startoffs,
               int64_t cnttoread, std::string *reason
#ifdef READFILE_ENABLE_MD5
               , std::string *md5p
#endif
    );

/** Same as above, not offset/cnt/md5 */
bool file_scan(const std::string& filename, FileScanDo* doer, std::string *reason);

/** Same as file_scan, from a memory buffer. No zlib processing at the moment */
bool string_scan(const char *data, size_t cnt, FileScanDo* doer,  std::string *reason
#ifdef READFILE_ENABLE_MD5
                 , std::string *md5p
#endif
    );

#if defined(READFILE_ENABLE_MINIZ)
/** Process a zip archive
 * @param membername member to process, or if "*", list member names instead through the data()
 *  callback.
 */
bool file_scan(const std::string& filename, const std::string& membername,
               FileScanDo* doer, std::string *reason);
bool string_scan(const char* data, size_t cnt, const std::string& membername,
                 FileScanDo* doer, std::string *reason);
// Interface using reusable object to save init time
class FileScanSourceZip;
std::shared_ptr<FileScanSourceZip> init_scan(const std::string& filename, std::string *reason);
std::shared_ptr<FileScanSourceZip> init_scan(const char *data, size_t cnt, std::string *reason);
bool zip_scan(
    std::shared_ptr<FileScanSourceZip> zip, const std::string& membername, FileScanDo* doer);
#endif

/**
 * Scanner-based helper: read file into string.
 * @return true for ok, false else
 */
bool file_to_string(const std::string& filename, std::string& data, std::string *reason = nullptr);

/**
 * Scanner-based helper: Read file chunk into string.
 * @param offs set to -1 for going from the start without decompression 
 * @param cnt Set to -1 for going to eof,
 */
bool file_to_string(const std::string& filename, std::string& data,
                    int64_t offs, size_t cnt, std::string *reason = nullptr);

#endif /* _READFILE_H_INCLUDED_ */