1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413
|
//////////////////////////////////////////////////////////////////////////////
/// \file c_regex_traits.hpp
/// Contains the definition of the c_regex_traits\<\> template, which is a
/// wrapper for the C locale functions that can be used to customize the
/// behavior of static and dynamic regexes.
//
// Copyright 2008 Eric Niebler. Distributed under the Boost
// Software License, Version 1.0. (See accompanying file
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#ifndef BOOST_XPRESSIVE_TRAITS_C_REGEX_TRAITS_HPP_EAN_10_04_2005
#define BOOST_XPRESSIVE_TRAITS_C_REGEX_TRAITS_HPP_EAN_10_04_2005
// MS compatible compilers support #pragma once
#if defined(_MSC_VER)
# pragma once
#endif
#include <cstdlib>
#include <boost/config.hpp>
#include <boost/assert.hpp>
#include <boost/xpressive/traits/detail/c_ctype.hpp>
namespace boost { namespace xpressive
{
namespace detail
{
///////////////////////////////////////////////////////////////////////////////
// empty_locale
struct empty_locale
{
};
///////////////////////////////////////////////////////////////////////////////
// c_regex_traits_base
template<typename Char, std::size_t SizeOfChar = sizeof(Char)>
struct c_regex_traits_base
{
protected:
template<typename Traits>
void imbue(Traits const &tr)
{
}
};
template<typename Char>
struct c_regex_traits_base<Char, 1>
{
protected:
template<typename Traits>
static void imbue(Traits const &)
{
}
};
#ifndef BOOST_XPRESSIVE_NO_WREGEX
template<std::size_t SizeOfChar>
struct c_regex_traits_base<wchar_t, SizeOfChar>
{
protected:
template<typename Traits>
static void imbue(Traits const &)
{
}
};
#endif
template<typename Char>
Char c_tolower(Char);
template<typename Char>
Char c_toupper(Char);
template<>
inline char c_tolower(char ch)
{
using namespace std;
return static_cast<char>(tolower(static_cast<unsigned char>(ch)));
}
template<>
inline char c_toupper(char ch)
{
using namespace std;
return static_cast<char>(toupper(static_cast<unsigned char>(ch)));
}
#ifndef BOOST_XPRESSIVE_NO_WREGEX
template<>
inline wchar_t c_tolower(wchar_t ch)
{
using namespace std;
return towlower(ch);
}
template<>
inline wchar_t c_toupper(wchar_t ch)
{
using namespace std;
return towupper(ch);
}
#endif
} // namespace detail
///////////////////////////////////////////////////////////////////////////////
// regex_traits_version_1_tag
//
struct regex_traits_version_1_tag;
///////////////////////////////////////////////////////////////////////////////
// c_regex_traits
//
/// \brief Encapsaulates the standard C locale functions for use by the
/// \c basic_regex\<\> class template.
template<typename Char>
struct c_regex_traits
: detail::c_regex_traits_base<Char>
{
typedef Char char_type;
typedef std::basic_string<char_type> string_type;
typedef detail::empty_locale locale_type;
typedef typename detail::char_class_impl<Char>::char_class_type char_class_type;
typedef regex_traits_version_2_tag version_tag;
typedef detail::c_regex_traits_base<Char> base_type;
/// Initialize a c_regex_traits object to use the global C locale.
///
c_regex_traits(locale_type const &loc = locale_type())
: base_type()
{
this->imbue(loc);
}
/// Checks two c_regex_traits objects for equality
///
/// \return true.
bool operator ==(c_regex_traits<char_type> const &) const
{
return true;
}
/// Checks two c_regex_traits objects for inequality
///
/// \return false.
bool operator !=(c_regex_traits<char_type> const &) const
{
return false;
}
/// Convert a char to a Char
///
/// \param ch The source character.
/// \return ch if Char is char, std::btowc(ch) if Char is wchar_t.
static char_type widen(char ch);
/// Returns a hash value for a Char in the range [0, UCHAR_MAX]
///
/// \param ch The source character.
/// \return a value between 0 and UCHAR_MAX, inclusive.
static unsigned char hash(char_type ch)
{
return static_cast<unsigned char>(std::char_traits<Char>::to_int_type(ch));
}
/// No-op
///
/// \param ch The source character.
/// \return ch
static char_type translate(char_type ch)
{
return ch;
}
/// Converts a character to lower-case using the current global C locale.
///
/// \param ch The source character.
/// \return std::tolower(ch) if Char is char, std::towlower(ch) if Char is wchar_t.
static char_type translate_nocase(char_type ch)
{
return detail::c_tolower(ch);
}
/// Converts a character to lower-case using the current global C locale.
///
/// \param ch The source character.
/// \return std::tolower(ch) if Char is char, std::towlower(ch) if Char is wchar_t.
static char_type tolower(char_type ch)
{
return detail::c_tolower(ch);
}
/// Converts a character to upper-case using the current global C locale.
///
/// \param ch The source character.
/// \return std::toupper(ch) if Char is char, std::towupper(ch) if Char is wchar_t.
static char_type toupper(char_type ch)
{
return detail::c_toupper(ch);
}
/// Returns a \c string_type containing all the characters that compare equal
/// disregrarding case to the one passed in. This function can only be called
/// if <tt>has_fold_case\<c_regex_traits\<Char\> \>::value</tt> is \c true.
///
/// \param ch The source character.
/// \return \c string_type containing all chars which are equal to \c ch when disregarding
/// case
//typedef array<char_type, 2> fold_case_type;
string_type fold_case(char_type ch) const
{
BOOST_MPL_ASSERT((is_same<char_type, char>));
char_type ntcs[] = {
detail::c_tolower(ch)
, detail::c_toupper(ch)
, 0
};
if(ntcs[1] == ntcs[0])
ntcs[1] = 0;
return string_type(ntcs);
}
/// Checks to see if a character is within a character range.
///
/// \param first The bottom of the range, inclusive.
/// \param last The top of the range, inclusive.
/// \param ch The source character.
/// \return first <= ch && ch <= last.
static bool in_range(char_type first, char_type last, char_type ch)
{
return first <= ch && ch <= last;
}
/// Checks to see if a character is within a character range, irregardless of case.
///
/// \param first The bottom of the range, inclusive.
/// \param last The top of the range, inclusive.
/// \param ch The source character.
/// \return in_range(first, last, ch) || in_range(first, last, tolower(ch)) || in_range(first,
/// last, toupper(ch))
/// \attention The default implementation doesn't do proper Unicode
/// case folding, but this is the best we can do with the standard
/// C locale functions.
static bool in_range_nocase(char_type first, char_type last, char_type ch)
{
return c_regex_traits::in_range(first, last, ch)
|| c_regex_traits::in_range(first, last, detail::c_tolower(ch))
|| c_regex_traits::in_range(first, last, detail::c_toupper(ch));
}
/// Returns a sort key for the character sequence designated by the iterator range [F1, F2)
/// such that if the character sequence [G1, G2) sorts before the character sequence [H1, H2)
/// then v.transform(G1, G2) < v.transform(H1, H2).
///
/// \attention Not currently used
template<typename FwdIter>
static string_type transform(FwdIter begin, FwdIter end)
{
BOOST_ASSERT(false); // BUGBUG implement me
}
/// Returns a sort key for the character sequence designated by the iterator range [F1, F2)
/// such that if the character sequence [G1, G2) sorts before the character sequence [H1, H2)
/// when character case is not considered then
/// v.transform_primary(G1, G2) < v.transform_primary(H1, H2).
///
/// \attention Not currently used
template<typename FwdIter>
static string_type transform_primary(FwdIter begin, FwdIter end)
{
BOOST_ASSERT(false); // BUGBUG implement me
}
/// Returns a sequence of characters that represents the collating element
/// consisting of the character sequence designated by the iterator range [F1, F2).
/// Returns an empty string if the character sequence is not a valid collating element.
///
/// \attention Not currently used
template<typename FwdIter>
static string_type lookup_collatename(FwdIter begin, FwdIter end)
{
BOOST_ASSERT(false); // BUGBUG implement me
}
/// For the character class name represented by the specified character sequence,
/// return the corresponding bitmask representation.
///
/// \param begin A forward iterator to the start of the character sequence representing
/// the name of the character class.
/// \param end The end of the character sequence.
/// \param icase Specifies whether the returned bitmask should represent the case-insensitive
/// version of the character class.
/// \return A bitmask representing the character class.
template<typename FwdIter>
static char_class_type lookup_classname(FwdIter begin, FwdIter end, bool icase)
{
return detail::char_class_impl<char_type>::lookup_classname(begin, end, icase);
}
/// Tests a character against a character class bitmask.
///
/// \param ch The character to test.
/// \param mask The character class bitmask against which to test.
/// \pre mask is a bitmask returned by lookup_classname, or is several such masks bit-or'ed
/// together.
/// \return true if the character is a member of any of the specified character classes, false
/// otherwise.
static bool isctype(char_type ch, char_class_type mask)
{
return detail::char_class_impl<char_type>::isctype(ch, mask);
}
/// Convert a digit character into the integer it represents.
///
/// \param ch The digit character.
/// \param radix The radix to use for the conversion.
/// \pre radix is one of 8, 10, or 16.
/// \return -1 if ch is not a digit character, the integer value of the character otherwise. If
/// char_type is char, std::strtol is used for the conversion. If char_type is wchar_t,
/// std::wcstol is used.
static int value(char_type ch, int radix);
/// No-op
///
locale_type imbue(locale_type loc)
{
this->base_type::imbue(*this);
return loc;
}
/// No-op
///
static locale_type getloc()
{
locale_type loc;
return loc;
}
};
///////////////////////////////////////////////////////////////////////////////
// c_regex_traits<>::widen specializations
/// INTERNAL ONLY
template<>
inline char c_regex_traits<char>::widen(char ch)
{
return ch;
}
#ifndef BOOST_XPRESSIVE_NO_WREGEX
/// INTERNAL ONLY
template<>
inline wchar_t c_regex_traits<wchar_t>::widen(char ch)
{
using namespace std;
return btowc(ch);
}
#endif
///////////////////////////////////////////////////////////////////////////////
// c_regex_traits<>::hash specializations
/// INTERNAL ONLY
template<>
inline unsigned char c_regex_traits<char>::hash(char ch)
{
return static_cast<unsigned char>(ch);
}
#ifndef BOOST_XPRESSIVE_NO_WREGEX
/// INTERNAL ONLY
template<>
inline unsigned char c_regex_traits<wchar_t>::hash(wchar_t ch)
{
return static_cast<unsigned char>(ch);
}
#endif
///////////////////////////////////////////////////////////////////////////////
// c_regex_traits<>::value specializations
/// INTERNAL ONLY
template<>
inline int c_regex_traits<char>::value(char ch, int radix)
{
using namespace std;
BOOST_ASSERT(8 == radix || 10 == radix || 16 == radix);
char begin[2] = { ch, '\0' }, *end = 0;
int val = strtol(begin, &end, radix);
return begin == end ? -1 : val;
}
#ifndef BOOST_XPRESSIVE_NO_WREGEX
/// INTERNAL ONLY
template<>
inline int c_regex_traits<wchar_t>::value(wchar_t ch, int radix)
{
using namespace std;
BOOST_ASSERT(8 == radix || 10 == radix || 16 == radix);
wchar_t begin[2] = { ch, L'\0' }, *end = 0;
int val = wcstol(begin, &end, radix);
return begin == end ? -1 : val;
}
#endif
// Narrow C traits has fold_case() member function.
template<>
struct has_fold_case<c_regex_traits<char> >
: mpl::true_
{
};
}}
#endif
|