1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188
|
/*
*
* Copyright (c) 2004
* John Maddock
*
* Use, modification and distribution are subject to the
* Boost Software License, Version 1.0. (See accompanying file
* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
*
*/
/*
* LOCATION: see http://www.boost.org for most recent version.
* FILE mfc_example.cpp
* VERSION see <boost/version.hpp>
* DESCRIPTION: examples of using Boost.Regex with MFC and ATL string types.
*/
#include <boost/regex/config.hpp>
#ifdef BOOST_HAS_ICU
#include <boost/regex/icu.hpp>
#include <iostream>
#include <assert.h>
//
// Find out if *password* meets our password requirements,
// as defined by the regular expression *requirements*.
//
bool is_valid_password(const U_NAMESPACE_QUALIFIER UnicodeString& password, const U_NAMESPACE_QUALIFIER UnicodeString& requirements)
{
return boost::u32regex_match(password, boost::make_u32regex(requirements));
}
//
// Extract filename part of a path from a UTF-8 encoded std::string and return the result
// as another std::string:
//
std::string get_filename(const std::string& path)
{
boost::u32regex r = boost::make_u32regex("(?:\\A|.*\\\\)([^\\\\]+)");
boost::smatch what;
if(boost::u32regex_match(path, what, r))
{
// extract $1 as a std::string:
return what.str(1);
}
else
{
throw std::runtime_error("Invalid pathname");
}
}
U_NAMESPACE_QUALIFIER UnicodeString extract_greek(const U_NAMESPACE_QUALIFIER UnicodeString& text)
{
// searches through some UTF-16 encoded text for a block encoded in Greek,
// this expression is imperfect, but the best we can do for now - searching
// for specific scripts is actually pretty hard to do right.
boost::u32regex r = boost::make_u32regex(L"[\\x{370}-\\x{3FF}](?:[^[:L*:]]|[\\x{370}-\\x{3FF}])*");
boost::u16match what;
if(boost::u32regex_search(text, what, r))
{
// extract $0 as a UnicodeString:
return U_NAMESPACE_QUALIFIER UnicodeString(what[0].first, what.length(0));
}
else
{
throw std::runtime_error("No Greek found!");
}
}
void enumerate_currencies(const std::string& text)
{
// enumerate and print all the currency symbols, along
// with any associated numeric values:
const char* re =
"([[:Sc:]][[:Cf:][:Cc:][:Z*:]]*)?"
"([[:Nd:]]+(?:[[:Po:]][[:Nd:]]+)?)?"
"(?(1)"
"|(?(2)"
"[[:Cf:][:Cc:][:Z*:]]*"
")"
"[[:Sc:]]"
")";
boost::u32regex r = boost::make_u32regex(re);
boost::u32regex_iterator<std::string::const_iterator> i(boost::make_u32regex_iterator(text, r)), j;
while(i != j)
{
std::cout << (*i)[0] << std::endl;
++i;
}
}
void enumerate_currencies2(const std::string& text)
{
// enumerate and print all the currency symbols, along
// with any associated numeric values:
const char* re =
"([[:Sc:]][[:Cf:][:Cc:][:Z*:]]*)?"
"([[:Nd:]]+(?:[[:Po:]][[:Nd:]]+)?)?"
"(?(1)"
"|(?(2)"
"[[:Cf:][:Cc:][:Z*:]]*"
")"
"[[:Sc:]]"
")";
boost::u32regex r = boost::make_u32regex(re);
boost::u32regex_token_iterator<std::string::const_iterator>
i(boost::make_u32regex_token_iterator(text, r, 1)), j;
while(i != j)
{
std::cout << *i << std::endl;
++i;
}
}
//
// Take a credit card number as a string of digits,
// and reformat it as a human readable string with "-"
// separating each group of four digit;,
// note that we're mixing a UTF-32 regex, with a UTF-16
// string and a UTF-8 format specifier, and it still all
// just works:
//
const boost::u32regex e = boost::make_u32regex("\\A(\\d{3,4})[- ]?(\\d{4})[- ]?(\\d{4})[- ]?(\\d{4})\\z");
const char* human_format = "$1-$2-$3-$4";
U_NAMESPACE_QUALIFIER UnicodeString human_readable_card_number(const U_NAMESPACE_QUALIFIER UnicodeString& s)
{
return boost::u32regex_replace(s, e, human_format);
}
int main()
{
// password checks using u32regex_match:
U_NAMESPACE_QUALIFIER UnicodeString pwd = "abcDEF---";
U_NAMESPACE_QUALIFIER UnicodeString pwd_check = "(?=.*[[:lower:]])(?=.*[[:upper:]])(?=.*[[:punct:]]).{6,}";
bool b = is_valid_password(pwd, pwd_check);
assert(b);
pwd = "abcD-";
b = is_valid_password(pwd, pwd_check);
assert(!b);
// filename extraction with u32regex_match:
std::string file = "abc.hpp";
file = get_filename(file);
assert(file == "abc.hpp");
file = "c:\\a\\b\\c\\d.h";
file = get_filename(file);
assert(file == "d.h");
// Greek text extraction with u32regex_search:
const UChar t[] = {
'S', 'o', 'm', 'e', ' ', 'w', 'h', 'e', 'r', 'e', ' ', 'i', 'n', 0x0391, 0x039D, 0x0395, 0x0398, 0x0391, 0
};
const UChar g[] = {
0x0391, 0x039D, 0x0395, 0x0398, 0x0391, 0
};
U_NAMESPACE_QUALIFIER UnicodeString text = t;
U_NAMESPACE_QUALIFIER UnicodeString greek = extract_greek(text);
assert(greek == g);
// extract currency symbols with associated value, use iterator interface:
std::string text2 = " $100.23 or \xC2\xA3""198.12 "; // \xC2\xA3 is the pound sign encoded in UTF-8
enumerate_currencies(text2);
enumerate_currencies2(text2);
U_NAMESPACE_QUALIFIER UnicodeString credit_card_number = "1234567887654321";
credit_card_number = human_readable_card_number(credit_card_number);
assert(credit_card_number == "1234-5678-8765-4321");
return 0;
}
#else
#include <iostream>
int main()
{
std::cout << "<NOTE>ICU support not enabled, feature unavailable</NOTE>";
return 0;
}
#endif
|