1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199
|
///////////////////////////////////////////////////////////////////////////////
/// \file boyer_moore.hpp
/// Contains the boyer-moore implementation. Note: this is *not* a general-
/// purpose boyer-moore implementation. It truncates the search string at
/// 256 characters, but it is sufficient for the needs of xpressive.
//
// Copyright 2008 Eric Niebler. Distributed under the Boost
// Software License, Version 1.0. (See accompanying file
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#ifndef BOOST_XPRESSIVE_DETAIL_BOYER_MOORE_HPP_EAN_10_04_2005
#define BOOST_XPRESSIVE_DETAIL_BOYER_MOORE_HPP_EAN_10_04_2005
// MS compatible compilers support #pragma once
#if defined(_MSC_VER)
# pragma once
# pragma warning(push)
# pragma warning(disable : 4100) // unreferenced formal parameter
# pragma warning(disable : 4996)
#endif
#include <climits> // for UCHAR_MAX
#include <cstddef> // for std::ptrdiff_t
#include <utility> // for std::max
#include <vector>
#include <boost/mpl/bool.hpp>
#include <boost/noncopyable.hpp>
#include <boost/iterator/iterator_traits.hpp>
#include <boost/type_traits/is_convertible.hpp>
#include <boost/xpressive/detail/detail_fwd.hpp>
namespace boost { namespace xpressive { namespace detail
{
///////////////////////////////////////////////////////////////////////////////
// boyer_moore
//
template<typename BidiIter, typename Traits>
struct boyer_moore
: noncopyable
{
typedef typename iterator_value<BidiIter>::type char_type;
typedef Traits traits_type;
typedef has_fold_case<Traits> case_fold;
typedef typename Traits::string_type string_type;
// initialize the Boyer-Moore search data structure, using the
// search sub-sequence to prime the pump.
boyer_moore(char_type const *begin, char_type const *end, Traits const &tr, bool icase)
: begin_(begin)
, last_(begin)
, fold_()
, find_fun_(
icase
? (case_fold() ? &boyer_moore::find_nocase_fold_ : &boyer_moore::find_nocase_)
: &boyer_moore::find_
)
{
std::ptrdiff_t const uchar_max = UCHAR_MAX;
std::ptrdiff_t diff = std::distance(begin, end);
this->length_ = static_cast<unsigned char>((std::min)(diff, uchar_max));
std::fill_n(static_cast<unsigned char *>(this->offsets_), uchar_max + 1, this->length_);
--this->length_;
icase ? this->init_(tr, case_fold()) : this->init_(tr, mpl::false_());
}
BidiIter find(BidiIter begin, BidiIter end, Traits const &tr) const
{
return (this->*this->find_fun_)(begin, end, tr);
}
private:
void init_(Traits const &tr, mpl::false_)
{
for(unsigned char offset = this->length_; offset; --offset, ++this->last_)
{
this->offsets_[tr.hash(*this->last_)] = offset;
}
}
void init_(Traits const &tr, mpl::true_)
{
this->fold_.reserve(this->length_ + 1);
for(unsigned char offset = this->length_; offset; --offset, ++this->last_)
{
this->fold_.push_back(tr.fold_case(*this->last_));
for(typename string_type::const_iterator beg = this->fold_.back().begin(), end = this->fold_.back().end();
beg != end; ++beg)
{
this->offsets_[tr.hash(*beg)] = offset;
}
}
this->fold_.push_back(tr.fold_case(*this->last_));
}
// case-sensitive Boyer-Moore search
BidiIter find_(BidiIter begin, BidiIter end, Traits const &tr) const
{
typedef typename boost::iterator_difference<BidiIter>::type diff_type;
diff_type const endpos = std::distance(begin, end);
diff_type offset = static_cast<diff_type>(this->length_);
for(diff_type curpos = offset; curpos < endpos; curpos += offset)
{
std::advance(begin, offset);
char_type const *pat_tmp = this->last_;
BidiIter str_tmp = begin;
for(; tr.translate(*str_tmp) == *pat_tmp; --pat_tmp, --str_tmp)
{
if(pat_tmp == this->begin_)
{
return str_tmp;
}
}
offset = this->offsets_[tr.hash(tr.translate(*begin))];
}
return end;
}
// case-insensitive Boyer-Moore search
BidiIter find_nocase_(BidiIter begin, BidiIter end, Traits const &tr) const
{
typedef typename boost::iterator_difference<BidiIter>::type diff_type;
diff_type const endpos = std::distance(begin, end);
diff_type offset = static_cast<diff_type>(this->length_);
for(diff_type curpos = offset; curpos < endpos; curpos += offset)
{
std::advance(begin, offset);
char_type const *pat_tmp = this->last_;
BidiIter str_tmp = begin;
for(; tr.translate_nocase(*str_tmp) == *pat_tmp; --pat_tmp, --str_tmp)
{
if(pat_tmp == this->begin_)
{
return str_tmp;
}
}
offset = this->offsets_[tr.hash(tr.translate_nocase(*begin))];
}
return end;
}
// case-insensitive Boyer-Moore search with case-folding
BidiIter find_nocase_fold_(BidiIter begin, BidiIter end, Traits const &tr) const
{
typedef typename boost::iterator_difference<BidiIter>::type diff_type;
diff_type const endpos = std::distance(begin, end);
diff_type offset = static_cast<diff_type>(this->length_);
for(diff_type curpos = offset; curpos < endpos; curpos += offset)
{
std::advance(begin, offset);
string_type const *pat_tmp = &this->fold_.back();
BidiIter str_tmp = begin;
for(; pat_tmp->end() != std::find(pat_tmp->begin(), pat_tmp->end(), *str_tmp);
--pat_tmp, --str_tmp)
{
if(pat_tmp == &this->fold_.front())
{
return str_tmp;
}
}
offset = this->offsets_[tr.hash(*begin)];
}
return end;
}
private:
char_type const *begin_;
char_type const *last_;
std::vector<string_type> fold_;
BidiIter (boyer_moore::*const find_fun_)(BidiIter, BidiIter, Traits const &) const;
unsigned char length_;
unsigned char offsets_[UCHAR_MAX + 1];
};
}}} // namespace boost::xpressive::detail
#if defined(_MSC_VER)
# pragma warning(pop)
#endif
#endif
|