1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452
|
/*GPL*START*
*
* tstring - NUL byte tolerant sophisticated string class
*
* Copyright (C) 1997-2001 by Johannes Overmann <Johannes.Overmann@gmx.de>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
* *GPL*END*/
#ifndef _ngw_tstring_h_
#define _ngw_tstring_h_
#include <stdio.h>
#include <stdarg.h>
#include <ctype.h>
#include <string.h>
#include <limits.h>
#include "tvector.h"
#include "texception.h"
using namespace std;
/**@name null tolerant string class */
/*@{*/
/// null tolerant string class
class tstring {
public:
// invalid iterator
static const size_t npos = static_cast<size_t>(-1);
// flags for scanToken()
enum {ALPHA=1, NUM=2, DIGIT=2, LOWER=4, UPPER=8, PRINT=16, XDIGIT=32,
SPACE=64, ALNUM=1|2, PUNCT=128, CNTRL=256, GRAPH=1024,
ALL=2048, NONE=0};
/// case flags for modify case
enum {NOT=0, CAPITALIZE=-1};
private:
// internal string representation
class Rep {
public:
size_t len; // length without term 0 byte
size_t mem; // allocated mem without term 0 byte
int ref; // reference count (>=1)
bool vulnerable; // true == always grab by clone, never by reference
// (the string has become vulnerable to the outside)
// char data[mem+1]; string data follows (+1 for term 0 byte)
// return pointer to string data
char *data() {return (char *)(this + 1);} // 'this + 1' means 'the byte following this object'
// character access
char& operator[] (size_t i) {return data()[i];}
// reference
Rep* grab() {if(vulnerable) return clone(); ++ref; return this;}
// dereference
void release() {if(--ref == 0) delete this;}
// copy this representation
Rep *clone(size_t minmem = 0);
// terminate string with 0 byte
void terminate() {*(data()+len) = 0;} // set term 0 byte
// static methods
// operator new for this class
// add a tag parameter to ensure that the signature of the delete operator does not collide with the (void*,size_t) overload
static void * operator new (size_t size, bool /*tag*/, size_t tmem) {
return ::operator new (size + tmem + 1);}
static void operator delete (void *p, bool /*tag*/, size_t) {
::operator delete (p); }
static void operator delete (void *p) {
::operator delete (p); }
// create a new representation
static Rep *create(size_t tmem);
// return pointer to the null string representation
static Rep * nulRep() {if(nul == 0) createNulRep(); return nul;}
// return pointer to the zero string representation (string conatining a literal 0: "0" (and not "\0"))
static Rep * zeroRep() {if(zero == 0) createZeroRep(); return zero;}
// create null string representation
static void createNulRep();
// create zero string representation
static void createZeroRep();
private:
// static null string ("") representation
static Rep* nul;
static char nul_mem[];
// static zero string ("0") representation
static Rep* zero;
static char zero_mem[];
// forbid assignement
Rep& operator=(const Rep&);
};
public:
/**@name constructor & destructor */
/*@{*/
/// default construction
tstring(): rep(Rep::nulRep()->grab()) {}
/// copy construction
tstring(const tstring& a):rep(a.rep->grab()) {}
/// init from cstring
tstring(const char *s);
/// extract bytearray s of length len
tstring(const char *s, size_t len);
/// create string of chars c with length n
explicit tstring(char c, size_t n);
/// char to string conversion
explicit tstring(char c);
/// int to string conversion
explicit tstring(int i);
/// int to string conversion with format
explicit tstring(int i, const char *format);
/// double to string conversion
explicit tstring(double d, const char *format = "%g");
/// destructor
~tstring() {rep->release();}
/*@}*/
/**@name main interface */
/*@{*/
/// return length in bytes
size_t len() const {return rep->len;}
/// return length in bytes
size_t length() const {return rep->len;}
/// return length in bytes
size_t size() const {return rep->len;}
/// clear string
void clear() {replaceRep(Rep::nulRep()->grab());}
/// explicit conversion to c string
// const char *operator*() const {return rep->data();}
/// explicit conversion to c string
const char *c_str() const {return rep->data();}
/// explicit conversion to c string
const char *data() const { return rep->data();}
/// direct raw data access: user with caution
char *rawdata() { invulnerableDetach(); return rep->data(); }
/// return true if string is empty, else false
bool empty() const {return rep->len == 0;}
/// append string
tstring& operator += (const tstring& a);
/// append cstring
tstring& operator += (const char *a);
/// append cstring
tstring& operator += (char c);
/// append byte array a of length len
tstring& append(const char *a, int alen);
/// assign string a to this
tstring& operator = (const tstring& a);
/// direct character access: const/readonly
char operator [] (size_t i) const;
/// direct character access: read/write
char& operator [] (size_t i);
/// substring extraction (len=end-start)
tstring substr(size_t start, size_t end = npos) const;
/// ASCII to number conversion
bool toLong(long& long_out, int base = 0) const;
bool toInt(int& int_out, int base = 0) const;
int getInt(int base = 0) const { int i = 0; toInt(i, base); return i; }
bool toDouble(double& double_out) const;
bool toBool(bool& bool_out) const;
/*@}*/
/**@name scanning */
/*@{*/
/// return a scanned token with scanner
tstring scanToken(size_t& scanner, int flags,
const char *allow=0, const char *forbid=0,
bool allow_quoted=false) const;
/// scan a token or quoted string to out with scanner
tstring scanString(size_t& scanner, int flags,
const char *allow=0, const char *forbid=0) const {
return scanToken(scanner, flags, allow, forbid, true);}
/// scan a token up to char upto
tstring scanUpTo(size_t& scanner, char upto) const {
int start(scanner);
while((scanner < rep->len)&&((*rep)[scanner]!=upto)) ++scanner;
return substr(start, scanner);}
/// scan a token to out up to chars upto
tstring scanUpTo(size_t& scanner, const char *upto) const {
int start(scanner);
while((scanner < rep->len)&&(strchr(upto, (*rep)[scanner])==0))
++scanner;
return substr(start, scanner);}
/// return the rest of the scanned string
tstring scanRest(size_t& scanner) const {if(scanner < rep->len) {
int start(scanner);scanner=rep->len;return substr(start, scanner);
} return tstring();}
/// skip spaces
void skipSpace(size_t& scanner) const
{while((scanner < rep->len)&&isspace((*rep)[scanner]))++scanner;}
/// perhaps skip one char c
void perhapsSkipOneChar(size_t& scanner, char c) const
{if((scanner < rep->len)&&((*rep)[scanner]==c)) ++scanner;}
/// return true if the end of string (eos) is reached
bool scanEOS(size_t scanner) const
{if(scanner >= rep->len) return true; else return false;}
/// return the last character in the string or 0 if empty
char lastChar() const {return rep->len?(*rep)[rep->len-1]:0;}
/// return the first character in the string or 0 if empty
char firstChar() const {return (*rep)[0];}
/// return true if entire string consists of whitespace
bool consistsOfSpace() const;
/// return true if string has prefix
bool hasPrefix(const tstring& prefix) const;
/// return true if string has suffix
bool hasSuffix(const tstring& suffix) const;
/// return index of first occurence of char c or npos if not found
size_t firstOccurence(char c) const;
/// check whether char is contained or not
bool contains(char c) const { return firstOccurence(c) != npos; }
/// remove whitespace at beginning and end
void cropSpace();
/// remove whitespace at end
void cropSpaceEnd();
/// collapse whitespace
void collapseSpace();
/// replace char from with char to
void translateChar(char from, char to);
/// expand unprintable chars to C-style backslash sequences
void expandUnprintable(char quotes = 0);
/// backslashify backslash and quotes
void backslashify();
/// compile C-style backslash sequences back to unprintable chars
void compileCString();
/// truncate to maximal length max
void truncate(size_t max);
/// replace unprintable characters for safe printing
void replaceUnprintable(bool only_ascii = true);
/**
remove quotes
@param allow_bslash true == backslashing allowed to protect quotes
@param crop_space true == remove leading/trailing spaces not protected by quotes
*/
void unquote(bool allow_bslash = true, bool crop_space = true);
/// return and remove the first words that fit into a string of length max
tstring getFitWords(size_t max); // throw(InvalidWidth);
/// remove the first words that fit into a string of length max and return in block format
tstring getFitWordsBlock(size_t max); // throw(InvalidWidth);
/// remove html tags (level == number of open brakets before call, init:0)
void removeHTMLTags(int& level);
/*@}*/
/**@name search/replace */
/*@{*/
/// replace substring search with replace, return number of replacements (not regexp, use TRegEx to match regular expressions)
int searchReplace(const tstring& search, const tstring& replace,
bool ignore_case=false, bool whole_words=false,
bool preserve_case=false, int progress=0,
const tstring& pre_padstring=tstring(),
const tstring& post_padstring=tstring(), tvector<int> *match_pos=0, int max_num = INT_MAX);
/// return number of occurences of pat (not regexp) returns -1 on empty pat
int search(const tstring& pat,
bool ignore_case=false, bool whole_words=false,
int progress=0, tvector<int> *match_pos=0) const; // throw(StringIsEmpty);
/// replace substring
void replace(size_t start, size_t len, const tstring &str);
/*@}*/
/**@name file I/O */
/*@{*/
/// read line from file like fgets, no line length limit
bool readLine(FILE *file);
/// write string to file, return number of bytes written
size_t write(FILE *file) const;
/// read len bytes from file to string, return bytes read
size_t read(FILE *file, size_t len); // throw(InvalidWidth);
/// read whole file into one string, return 0 on success -x on error
int readFile(const char *filename);
/// write string into file, return 0 on success -x on error
int writeFile(const char *filename);
/*@}*/
/**@name filename manipulation */
/*@{*/
/// remove leading path from filename
void extractFilename();
/// remove part after last slash
void extractPath();
/// add a slash at the end if it is missing
void addDirSlash();
/// remove last char if last char is a slash
void removeDirSlash();
/// extract part after the last dot (empty string if no extension, leading dot is ignored)
void extractFilenameExtension();
/// make paths comparable (kill multislash, dots and resolve '..')
void normalizePath();
/// check for absolute path
bool isAbsolutePath() const {if((*rep)[0]=='/') return true; return false;}
/// get truncated filename (for printing puroses)
tstring shortFilename(size_t maxchar) const;
/*@}*/
/**@name misc */
/*@{*/
/// get percentage of nonprintable and nonspace chars (0.0 .. 100.0)
double binaryPercentage() const;
/// check for 0 in string (then its not a real cstring anymore)
bool containsNulChar() const;
/// get a pointer to the at most max last chars (useful for printf)
const char *pSuf(size_t max) const;
/// sprintf into this string
void sprintf(const char *format, ...);
/*@}*/
/**@name case */
/*@{*/
/// convert to lower case
void lower();
/// convert to upper case
void upper();
/// convert to lower case, first char upper case
void capitalize();
/// check for lower case, empty string returns false
bool isLower() const;
/// check for upper case, empty string returns false
bool isUpper() const;
/// check for capitalized case, empty string returns false
bool isCapitalized() const;
/*@}*/
public:
/**@name detach methods */
/*@{*/
/// detach from string pool, you should never need to call this
void detach();
// no, there is *not* a dangling pointer here (ref > 1)
/** detach from string pool and make sure at least minsize bytes of mem are available
(use this before the dirty version sprintf to make it clean)
(use this before the clean version sprintf to make it fast)
*/
void detachResize(size_t minsize);
/// detach from string pool and declare that string might be externally modified (the string has become vulnerable)
void invulnerableDetach();
/*@}*/
private:
// hidden string representation
Rep *rep;
// private methods
void replaceRep(Rep *p) {rep->release(); rep = p;}
public:
// compare helpers
static int _string_cmp(const tstring& s1, const tstring& s2);
static bool _string_equ(const tstring& s1, const tstring& s2);
};
/**@name concat operators */
/*@{*/
///
tstring operator + (const tstring& s1, const tstring& s2);
///
tstring operator + (const char *s1, const tstring& s2);
///
tstring operator + (const tstring& s1, const char *s2);
///
tstring operator + (char s1, const tstring& s2);
///
tstring operator + (const tstring& s1, char s2);
/*@}*/
/**@name compare operators */
/*@{*/
///
bool operator == (const tstring& s1, const tstring& s2);
///
bool operator == (const tstring& s1, const char *s2);
///
bool operator == (const char *s1, const tstring& s2);
///
bool operator != (const tstring& s1, const tstring& s2);
///
bool operator != (const tstring& s1, const char *s2);
///
bool operator != (const char *s1, const tstring& s2);
///
bool operator < (const tstring& s1, const tstring& s2);
///
bool operator < (const tstring& s1, const char *s2);
///
bool operator < (const char *s1, const tstring& s2);
///
bool operator > (const tstring& s1, const char *s2);
///
bool operator > (const char *s1, const tstring& s2);
///
bool operator > (const tstring& s1, const tstring& s2);
/*@}*/
/**@name misc friends and nonmembers */
/*@{*/
/// split string into pieces by characters in c-str separator
tvector<tstring> split(const tstring& s, const char *separator,
bool allow_quoting=false,
bool crop_space=false);
/// join, reverse the effect of split
tstring join(const tvector<tstring>& a, const tstring& separator);
/// try to preserve case from 'from' to 'to' and return altered 'to' with case from 'from'
tstring preserveCase(const tstring& from, const tstring& to);
/// modify case
inline tstring modifyCase(const tstring& s, int _case) {
tstring r(s);
switch(_case) {
case tstring::UPPER: r.upper(); break;
case tstring::LOWER: r.lower(); break;
case tstring::CAPITALIZE: r.capitalize(); break;
default: break;
}
return r;
}
/// Create progress bar
const char *progressBar(const char *message = 0, unsigned int n = 0, unsigned int max = 0, int width = 79);
/// load text file to array of strings
tvector<tstring> loadTextFile(const char *fname);
/// load text file to array of strings
tvector<tstring> loadTextFile(FILE *file);
/*@}*/
/*@}*/
#endif /* _ngw_tstring_h_ */
|