1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751
|
//
// aegis - project change supervisor
// Copyright (C) 2004-2008 Peter Miller
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see
// <http://www.gnu.org/licenses/>.
//
#ifndef COMMON_NSTRING_H
#define COMMON_NSTRING_H
#include <common/str.h>
class nstring_list; // forward
/**
* The nstring class is used to represent a reference counted narrow string
* with fast equality comparison.
*
* This is a narrow string class, using ordinary char as the internal
* character type. For wide strings, which use wchar_t as the internal
* character type, see the wstring class.
*/
class nstring
{
public:
/**
* The destructor.
*
* This destructor is <b>not</b> virtual, do not derive from this class.
*/
~nstring()
{
str_free(ref);
ref = 0;
}
/**
* The default constructor.
*/
nstring() :
ref(get_empty_ref())
{
}
/**
* The constructor.
*/
nstring(const char *arg) :
ref(arg ? str_from_c(arg) : get_empty_ref())
{
}
/**
* The constructor.
*/
nstring(const char *data, size_t len) :
ref(str_n_from_c(data, len))
{
}
/**
* The constructor.
*/
explicit
nstring(string_ty *arg) :
ref(arg ? str_copy(arg) : get_empty_ref())
{
}
/**
* The copy constructor.
*/
nstring(const nstring &arg) :
ref(str_copy(arg.ref))
{
}
/**
* The assignment operator.
*/
nstring &
operator=(const nstring &arg)
{
if (this != &arg)
{
str_free(ref);
ref = (arg.ref ? str_copy(arg.ref) : get_empty_ref());
}
return *this;
}
/**
* The c_str method is used to obtain a pointer to the underlying C
* string (guaranteed to the NUL terminated).
*/
const char *
c_str()
const
{
return ref->str_text;
}
bool
empty()
const
{
return (ref->str_length == 0);
}
size_t
size()
const
{
return ref->str_length;
}
size_t
length()
const
{
return ref->str_length;
}
/**
* \brief
* join two strings together
*
* The str_catenate function is used to join two strings togther to
* form a new string. The are joined in the order given.
*
* \param arg
* A string to be joined. Will not be modified.
*
* \return
* a pointer to a string in dynamic memory. Use str_free()
* when finished with. The contents of the structure pointed
* to <b>shall not</b> be altered.
*/
nstring
catenate(const nstring &arg)
const
{
string_ty *tmp = str_catenate(ref, arg.ref);
nstring result(tmp);
str_free(tmp);
return result;
}
nstring
operator+(const nstring &arg)
const
{
string_ty *tmp = str_catenate(ref, arg.ref);
nstring result(tmp);
str_free(tmp);
return result;
}
nstring &
operator+=(const nstring &arg)
{
if (!arg.empty())
{
string_ty *s = str_catenate(ref, arg.ref);
str_free(ref);
ref = s;
}
return *this;
}
/**
* \brief
* joing strings together
*
* The str_cat_three function is used to join three strings together
* to form a new string. The are joined in the order given.
*
* \param str2
* A string to be joined. Will not be modified.
* \param str3
* A string to be joined. Will not be modified.
*
* \return
* a pointer to a string in dynamic memory. Use str_free() when
* finished with. The contents of the structure pointed to
* <b>shall not</b> be altered.
*/
nstring cat_three(const nstring &str2, const nstring &str3) const;
/**
* \brief
* test a boolean
*
* The str_bool function is used to test the value of a string, as if
* it contained a number. If it doesn't contain a number, it is as if
* the strings was "1".
*
* \return
* False if the numeric value in the strings was zero, or the
* empty string. True if the numeric value in the string was
* non-zero, or the string was non-numeric.
*/
operator bool() const;
/**
* The logical netation operator.
* Returns the negation of the bool() operator.
*/
bool operator!() const;
/**
* \brief
* convert to upper case
*
* The str_upcase function is used to create a new string where the
* lower case characters in the input string are converted to upper
* case.
*
* \return
* a pointer to a string in dynamic memory. Use str_free()
* when finished with. The contents of the structure pointed
* to <b>shall not</b> be altered.
*/
nstring upcase() const;
/**
* \brief
* convert to lower case
*
* The str_downcase function is used to create a new string where the
* upper case characters in the input string are converted to lower
* case.
*
* \return
* a pointer to a string in dynamic memory. Use str_free()
* when finished with. The contents of the structure pointed
* to <b>shall not</b> be altered.
*/
nstring downcase() const;
/**
* \brief
* convert to title case
*
* The str_capitalize function is used to create a new string where the
* first letter or each word of the inopuyt string are upper case, and
* the remaining letters in each word are lower case. (Sometimes called
* Title Case.)
*
* \returns
* a pointer to a string in dynamic memory.
*/
nstring capitalize() const;
/**
* \brief
* extract a field
*
* The str_field function is used to extract the \a nth field, where
* each field is separated by the \a sep string.
*
* \param sep
* The string which separates each field.
* \param nth
* The number of the field to be extracted. Zero based.
* If too high, the emtry string is returned.
*
* \return
* a pointer to a string in dynamic memory. Use str_free() when
* finished with. The contents of the structure pointed to
* <b>shall not</b> be altered.
*/
nstring field(char sep, int nth) const;
/**
* \brief
* format text
*
* The str_format function is used to create a new string by interpreting
* the \a fmt string. All formats understood by the ANSI C printf(3)
* are understood by this function (but probably not your favorite
* proprietary extension). In addition the '%S' specifier expects a <i>
* string_ty * </i> argument.
*
* \param fmt
* The format string to be interpreted when constructing the
* return value.
*
* \return
* a pointer to a string in dynamic memory. Use str_free()
* when finished with. The contents of the structure pointed
* to <b>shall not</b> be altered.
*/
static nstring format(const char *fmt, ...) ATTR_PRINTF(1, 2);
/**
* \brief
* format text
*
* The str_vformat function is used to create a new string by
* interpreting the \a fmt string. All formats understood by the
* ANSI C printf(3) are understood by this function (but probably
* not your favorite proprietary extension). In addition the '%S'
* specifier expects a <i>string_ty *</i> argument.
*
* \param fmt
* The format string to be interpreted when constructing the
* return value.
* \param ap
* Where to obtain additional arguments required by the \a fmt string.
*
* \return
* a pointer to a string in dynamic memory. Use str_free()
* when finished with. The contents of the structure pointed
* to <b>shall not</b> be altered.
*/
static nstring vformat(const char *fmt, va_list ap);
/**
* \brief
* test string equality
*
* The str_equal function is used to test to see if two strings are
* exactly the same.
*
* \param arg
* A string to be compared. Will not be modified.
*
* \note
* Users shall always write code as if they did not know that a
* string equality test is a pointer equality test.
*
* \return
* Non-zero if the strings are equal,
* zero if the strings are unequal.
*/
bool
equal(const nstring &arg)
const
{
return (ref == arg.ref);
}
/**
* The equal-to operator.
*
* @param rhs
* The right hans side of the comparison.
*/
bool
operator==(const nstring &arg)
const
{
return (ref == arg.ref);
}
/**
* The not-equal-to operator.
*
* @param rhs
* The right hans side of the comparison.
*/
bool
operator!=(const nstring &arg)
const
{
return (ref != arg.ref);
}
/**
* The less-than operator.
*
* @param rhs
* The right hans side of the comparison.
*/
bool operator<(const nstring &arg) const;
/**
* The less-than-or-equal operator.
*
* @param rhs
* The right hans side of the comparison.
*/
bool operator<=(const nstring &arg) const;
/**
* The greater-than operator.
*
* @param rhs
* The right hans side of the comparison.
*/
bool operator>(const nstring &arg) const;
/**
* The greater-than-or-equal operator.
*
* @param rhs
* The right hans side of the comparison.
*/
bool operator>=(const nstring &arg) const;
/**
* \brief
* quote C meta-characters
*
* The quote_c method is used to create a new string which
* quotes the C meta-characters in the input string.
*/
nstring quote_c() const;
/**
* \brief
* quote shell meta-characters
*
* The str_quote_shell function is used to create a new string which
* quotes the shell meta-characters in the input string.
*
* \return
* a pointer to a string in dynamic memory. Use str_free()
* when finished with. The contents of the structure pointed
* to <b>shall not</b> be altered.
*/
nstring quote_shell() const;
/**
* \brief
* remove excess white space
*
* The trim method is used to remove white space from the beginning
* and end of the string, and replace all other runs of one or more
* white space characters with a single space.
*
* \return
* another string
*/
nstring trim() const;
/**
* \brief
* remove excess white space
*
* The trim_lines method is used to remove white space from the
* beginning and end of <i>lines</i> within the string, and replace
* all other runs of one or more white space characters with a
* single space.
*
* \return
* another string
*/
nstring trim_lines() const;
/**
* The trim_extension method is used to build a new string without
* the file extension. For example, the string "a/b.c" will return
* "a/b".
*/
nstring trim_extension() const;
/**
* The get_extension method is used to build a new string
* containing the file extension, if any, without the dot. For
* example, the string "a/b.c" will return "c".
*/
nstring get_extension() const;
/**
* \brief
* remove excess white space
*
* The snip method is used to remove white space from the beginning
* and end of the string. Interior white space is left unchanged.
*
* \return
* another string
*/
nstring snip() const;
/**
* \brief
* check is valid
*
* The str_validate function is used to confirm that the given
* string pointer, \a str, points to a valid string. Usually used
* for debugging, often in assert()s.
*
* \return
* Non-zero if valid, zero if invalid.
*/
bool
valid()
const
{
return str_validate(ref);
}
/**
* The get_ref method is used to extract the reference to the
* underlying reference counted string object. If the ref pointer
* is NULL a pointer to a common empty string object is returned.
*
* Once the sources cease using string_ty directly, this method
* will become private.
*/
string_ty *
get_ref()
const
{
return ref;
}
/**
* The starts_with method is ised to test whether this string
* starts with the given prefix.
*
* @param prefix
* The string to test for.
*/
bool starts_with(const nstring &prefix) const;
/**
* The ends_with method is ised to test whether this string
* ends with the given suffix.
*
* @param suffix
* The string to test for.
*/
bool ends_with(const nstring &suffix) const;
/**
* The ends_with_nocase method is ised to test whether this string
* ends with the given suffix. The comparison will be case
* insensitive.
*
* @param suffix
* The string to test for.
*/
bool ends_with_nocase(const nstring &suffix) const;
/**
* The gmatch function is used to match the string against a file
* globbing pattern.
*
* @pattern
* The pattern to try against the string.
* @returns
* bool; true if matches pattern, false if does not.
*/
bool gmatch(const char *pattern) const;
/**
* The gmatch function is used to match the string against a file
* globbing pattern.
*
* @pattern
* The pattern to try against the string.
* @returns
* bool; true if matches pattern, false if does not.
*/
bool gmatch(const nstring &pattern) const;
/**
* The gmatch function is used to match the string against a set of
* file globbing patterns.
*
* @patterns
* The patterns to try against the string.
* @returns
* bool; true if matches at least one pattern, false if does
* not match any pattern.
*/
bool gmatch(const nstring_list &pattern) const;
/**
* The identifier method is used to convert all non-C-identifier
* characters in the string to underscores. The intention is to
* create a valid C identifier from the string.
*/
nstring identifier() const;
/**
* The replace method may be used to alter a string by replacing
* one constant substring with another.
*
* @note
* The replacement is <b>not</b> done <i>in situ</i>. The original
* string is unaltered.
*
* @param lhs
* The substring to look for.
* @param rhs
* The substring to replace \a lhs if found.
* @param maximum
* The maximum number of times to perform the replacement.
* Defaults to "infinity".
* @returns
* A new string with the replacements made.
*/
nstring replace(const nstring &lhs, const nstring &rhs, int maximum = -1)
const;
/**
* The indexing operator is used to extract the nth character of a
* string. Indexes out of range will result in the NUL character
* ('\0') being returned.
*
* @param n
* The character to extract. Zero based.
* @returns
* The character requested, or NUL ('\0') if the index is out
* of range.
*/
char
operator[](size_t n)
const
{
return (n < size() ? ref->str_text[n] : '\0');
}
/**
* The clear method is used to delete to contents of the string,
* and replace it with the empty string/
*/
void clear();
/**
* The url_quote mwthod is used to form a string suitable for use
* within an HTML href="" string, or similar. This means that
* special characters and unprintable characters are replaced with
* %NN escape sequences.
*/
nstring url_quote() const;
/**
* The url_unquote method is used to form a normal string given an
* HTML href="" string, or similar. This means that %NN escape
* sequences are replaced with single bytes.
*/
nstring url_unquote() const;
/**
* The html_quote method is used to form a string suitable for use
* withing an HTML paragraph. This means that special characters
* and unprintable characters are replaced with &#DDD; escape
* sequences. Some special characters are also replaced, e.g. <
*
* \param para
* Whether or not to translate \n\n and \n as <p> and <br>.
* Defaults to false (no translation).
*/
nstring html_quote(bool para = false) const;
/**
* The html_unquote method is used to form a normal string given
* text from an HTML paragraph. This means that &#DDD; escape
* sequences are replaced by single bytes. Some special characters
* are also replaced, e.g. <
*
* \note
* unicode values (DDD > 255) are not understood.
*/
nstring html_unquote() const;
/**
* The to_long method attempts to turn a string into a long value.
* It returns zero on failure.
*/
long to_long() const;
/**
* The substring method may be used to extract a substring from this
* string.
*
* @param start
* The offset into the string where the substring starts. If
* negative, is measured from the end.
* @param nbytes
* The number of bytes to extract, if that many available. If
* negative, measured to the left (text not reversed).
* @returns
* a string, note that it could be less than nbytes long.
*/
nstring substring(long start, long nbytes) const;
/**
* The dirname method is used to extract the directory part of a
* filename. If there is no directory part, "." is returned.
*/
nstring dirname() const;
/**
* The first_dirname method is used to extract the first directory
* part of a filename. If there is no directory part, "." is
* returned.
*/
nstring first_dirname() const;
/**
* The basename method is used to generate a new string from
* this one with any leading directory components removed. If
* specified, also remove a trailing suffix.
*/
nstring basename(const nstring &suffix = "") const;
/**
* The get_hash method is used to access the has value of this
* key. While this is an internal implementation feature, it is
* often extremely handy to have a pre-calculated hash value for a
* string, and so it is made public.
*/
str_hash_ty get_hash() const { return ref->str_hash; }
private:
/**
* The ref instance variable is used to remember the location of
* the object common to all of the references.
* The is <i>never</i> the NULL pointer.
*/
string_ty *ref;
/**
* The get_empty_ref() class method is used to get a
* pointer to an underlying string object of length zero.
*/
static string_ty *get_empty_ref();
};
inline nstring
operator+(const char *lhs, const nstring &rhs)
{
return nstring(lhs).catenate(rhs);
}
inline nstring
operator+(const nstring &lhs, const char *rhs)
{
return lhs.catenate(nstring(rhs));
}
#endif // COMMON_NSTRING_H
|