File: smallut.h

package info (click to toggle)
recoll 1.43.13-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 16,956 kB
  • sloc: cpp: 104,864; python: 9,923; xml: 7,324; ansic: 6,447; sh: 1,252; perl: 166; makefile: 73
file content (312 lines) | stat: -rw-r--r-- 12,015 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
/* Copyright (C) 2006-2022 J.F.Dockes
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 *   02110-1301 USA
 */
#ifndef _SMALLUT_H_INCLUDED_
#define _SMALLUT_H_INCLUDED_

#ifdef __MINGW32__
#define _POSIX_C_SOURCE 200809L
#endif

#include <cstdint>
#include <ctime>
#include <functional>
#include <map>
#include <memory>
#include <string>
#include <vector>

struct tm;

namespace MedocUtils {

// Miscellaneous mostly string-oriented small utilities
// Note that none of the following code knows about utf-8.

// Call this before going multithread.
void smallut_init_mt();

#ifndef SMALLUT_DISABLE_MACROS
#ifndef MIN
#define MIN(A,B) (((A)<(B)) ? (A) : (B))
#endif
#ifndef MAX
#define MAX(A,B) (((A)>(B)) ? (A) : (B))
#endif
#ifndef deleteZ
#define deleteZ(X) do {delete X;X = nullptr;} while(0)
#endif
#ifndef PRETEND_USE
#define PRETEND_USE(var) ((void)(var))
#endif
#ifndef VERSION_AT_LEAST
#define VERSION_AT_LEAST(LIBMAJ,LIBMIN,LIBREV,TARGMAJ,TARGMIN,TARGREV)  \
    ((LIBMAJ) > (TARGMAJ) ||                                            \
     ((LIBMAJ) == (TARGMAJ) &&                                          \
      ((LIBMIN) > (TARGMIN) ||                                          \
       ((LIBMIN) == (TARGMIN) && (LIBREV) >= (TARGREV)))))
#endif
#endif /* SMALLUT_DISABLE_MACROS */

// Case-insensitive compare. ASCII ONLY !
extern int stringicmp(const std::string& s1, const std::string& s2);

// For find_if etc.
struct StringIcmpPred {
    explicit StringIcmpPred(const std::string& s1)
        : m_s1(s1) {
    }
    bool operator()(const std::string& s2) const {
        return stringicmp(m_s1, s2) == 0;
    }
    const std::string& m_s1;
};

extern int stringlowercmp(const std::string& s1, // already lower
                          const std::string& s2);
extern int stringuppercmp(const std::string& s1, // already upper
                          const std::string& s2);

extern void stringtolower(std::string& io);
extern std::string stringtolower(const std::string& io);
extern void stringtoupper(std::string& io);
extern std::string stringtoupper(const std::string& io);
extern bool beginswith(const std::string& b, const std::string& sml);
inline bool startswith(const std::string& b, const std::string& sml) {
    return beginswith(b, sml);
}
extern bool endswith(const std::string& bg, const std::string& sml);

#ifdef _WIN32
// Conversion between utf-8 and wide char file names.
bool wchartoutf8(const wchar_t *in, std::string& out, int len = 0);
std::string wchartoutf8(const wchar_t *in, int len = 0);
bool utf8towchar(const std::string& in, wchar_t *out, int obytescap);
std::unique_ptr<wchar_t[]> utf8towchar(const std::string& in);
#define strcasecmp _stricmp
#define strncasecmp _strnicmp
#define localtime_r(a,b) localtime_s(b,a)
#define strtok_r strtok_s
#endif // _WIN32

/** Note for all templated functions:
 * By default, smallut.cpp has explicit instantiations for common
 * containers (list, vector, set, etc.). If this is not enough, or
 * conversely, if you want to minimize the module size, you can chose
 * the instantiations by defining the SMALLUT_EXTERNAL_INSTANTIATIONS
 * compilation flag, and defining the instances in a file named
 * smallut_instantiations.h
 */

/**
 * Parse input string into list of strings. See instantiation note above.
 *
 * Token delimiter is " \t\n" except inside dquotes. dquote inside
 * dquotes can be escaped with \ etc...
 * Input is handled a byte at a time, things will work as long as
 * space tab etc. have the ascii values and can't appear as part of a
 * multibyte char. utf-8 ok but so are the iso-8859-x and surely
 * others. addseps do have to be single-bytes
 */
template <class T> bool stringToStrings(const std::string& s, T& tokens,
                                        const std::string& addseps = "");

/**
 * Inverse operation. See instantiation note above.
 */
template <class T> void stringsToString(const T& tokens, std::string& s);
template <class T> std::string stringsToString(const T& tokens);

/**
 * Strings to CSV string. tokens containing the separator are quoted (")
 * " inside tokens is escaped as "" ([word "quote"] =>["word ""quote"""]
 * See instantiation note above.
 */
template <class T> std::string stringsToCSV(const T& tokens, char sep = ',');

/** Find longest common prefix for bunch of strings */
template <class T> std::string commonprefix(const T& values, bool aspaths = false);

/** Same, progressive way. Initially call with curpfx=="". Returns newprefix.
  *** If the return value is empty, STOP CALLING ***, else we start again with the next value. */
std::string commonprefix(const std::string& curpfx, const std::string& value, bool aspaths);

/**
 * Split input string. No handling of quoting.
 */
extern void stringToTokens(const std::string& s,
                           std::vector<std::string>& tokens,
                           const std::string& delims = " \t",
                           bool skipinit = true, bool allowempty = false);

/** Like toTokens but with multichar separator */
extern void stringSplitString(const std::string& str,
                              std::vector<std::string>& tokens,
                              const std::string& sep);

/** Join strings with string separator. No handling of quoting. */
extern std::string tokensToString(const std::vector<std::string>& tokens,
                                  const std::string& sep = " ");

/** Convert string to boolean */
extern bool stringToBool(const std::string& s);

/** Remove instances of characters belonging to set (default {space,
    tab}) at beginning and end of input string */
extern std::string& trimstring(std::string& s, const char *ws = " \t");
extern std::string& rtrimstring(std::string& s, const char *ws = " \t");
extern std::string& ltrimstring(std::string& s, const char *ws = " \t");

/** Escape things like < or & by turning them into entities */
extern std::string escapeHtml(const std::string& in);

/** Double-quote and escape to produce C source code string (prog generation) */
extern std::string makeCString(const std::string& in);

/** Replace some chars with spaces (ie: newline chars). */
extern std::string neutchars(const std::string& str, const std::string& chars, char rep = ' ');
extern void neutchars(const std::string& str, std::string& out,
                      const std::string& chars, char rep = ' ');

/** Turn string into something that won't be expanded by a shell. In practise
 *  quote with double-quotes and escape $`\ */
extern std::string escapeShell(const std::string& in);

/** Truncate a string to a given maxlength, avoiding cutting off midword if reasonably possible. */
extern std::string truncate_to_word(const std::string& input, std::string::size_type maxlen);

/** Convert byte count into unit (KB/MB...) appropriate for display */
std::string displayableBytes(int64_t size);

/** Break big string into lines */
std::string breakIntoLines(const std::string& in, unsigned int ll = 100, unsigned int maxlines = 50);

/** Small utility to substitute printf-like percents cmds in a string */
bool pcSubst(const std::string& in, std::string& out, const std::map<char, std::string>& subs);
/** Substitute printf-like percents and also %(key) */
bool pcSubst(
    const std::string& in, std::string& out, const std::map<std::string, std::string>& subs);
/** Substitute printf-like percents and %(nm), using result of function call */
bool pcSubst(
    const std::string& i, std::string& o, const std::function<std::string(const std::string&)>&);

/** Stupid little smart buffer handler avoiding value-initialization when not needed (e.g. for using
    as read buffer **/
class DirtySmartBuf {
public:
    explicit DirtySmartBuf(size_t sz) : m_buf(new char[sz]) {}
    ~DirtySmartBuf() { delete [] m_buf; }
    DirtySmartBuf(const DirtySmartBuf&) = delete;
    DirtySmartBuf& operator=(const DirtySmartBuf&) = delete;
    char *buf() { return m_buf; }
  private:
    char *m_buf;
};

/** Append system error message */
void catstrerror(std::string *reason, const char *what, int _errno);

/** Portable timegm. MS C has _mkgmtime, but there is a bug in Gminw which makes it inaccessible */
time_t portable_timegm(struct tm *tm);

inline void leftzeropad(std::string &s, unsigned len) {
    if (!s.empty() && s.length() < len) {
        s = s.insert(0, len - s.length(), '0');
    }
}

// Print binary string in hexa, separate bytes with character separ if not zero
// (e.g. ac:23:0c:4f:46:fd)
extern std::string hexprint(const std::string& in, char separ= 0);

#ifndef SMALLUT_NO_REGEX
// A class to solve platorm/compiler issues for simple regex
// matches. Uses the appropriate native lib under the hood.
// This always uses extended regexp syntax.
class SimpleRegexp {
public:
    enum Flags {SRE_NONE = 0, SRE_ICASE = 1, SRE_NOSUB = 2};
    /// @param nmatch must be >= the number of parenthesed subexp in exp
    SimpleRegexp(const std::string& exp, int flags, int nmatch = 0);
    ~SimpleRegexp();
    SimpleRegexp(const SimpleRegexp&) = delete;
    SimpleRegexp& operator=(const SimpleRegexp&) = delete;
    /// Match input against exp, return true if matches
    bool simpleMatch(const std::string& val) const;
    /// After simpleMatch success, get nth submatch, 0 is the whole
    /// match, 1 first parentheses, etc.
    std::string getMatch(const std::string& val, int i) const;
    /// Calls simpleMatch()
    bool operator() (const std::string& val) const;

    /// Replace the first occurrence of regexp.
    std::string simpleSub(const std::string& input, const std::string& repl);

    /// Check after construction
    bool ok() const;

    class Internal;
private:
    std::unique_ptr<Internal> m;
};
#endif // SMALLUT_NO_REGEX

inline void copybits(unsigned int& to, unsigned int from, unsigned int mask)
{
    to = (to & ~mask) | (from & mask);
}

/// Utilities for printing names for defined values (Ex: O_RDONLY->"O_RDONLY")

/// Entries for the descriptive table
struct CharFlags {
    CharFlags(int v, const char *y, const char *n=nullptr)
        : value(v), yesname(y), noname(n) {}
    unsigned int value; // Flag or value
    const char *yesname;// String to print if flag set or equal
    const char *noname; // String to print if flag not set (unused for values)
};

/// Helper macro for the common case where we want to print the
/// flag/value defined name
#define CHARFLAGENTRY(NM) {NM, #NM}

/// Translate a bitfield into string description
extern std::string flagsToString(const std::vector<CharFlags>&, unsigned int val);

/// Translate a value into a name
extern std::string valToString(const std::vector<CharFlags>&, unsigned int val);

/// Decode percent-encoded string
extern std::string pc_decode(const std::string&);

/// Sort alphanumeric strings with integer ordering for the numeric parts.
extern void sortAlphanumStrings(std::vector<std::string> &strings);

/// Parse HTTP range header value into a vector of offset pairs. -1 is used as a placeholder
/// wherever a part of a range is absent: either for a missing end or for a suffix range (absent
/// start meaning count from the end).
bool parseHTTPRanges(const std::string& ranges, std::vector<std::pair<int64_t, int64_t>>& oranges);

void millisleep(int millis);


} // End namespace MedocUtils

using namespace MedocUtils;

#endif /* _SMALLUT_H_INCLUDED_ */