File: regex_token_iterator.html

package info (click to toggle)
boost 1.32.0-6
  • links: PTS
  • area: main
  • in suites: sarge
  • size: 93,952 kB
  • ctags: 128,458
  • sloc: cpp: 492,477; xml: 52,125; python: 13,519; ansic: 13,013; sh: 1,773; yacc: 853; makefile: 526; perl: 418; lex: 110; csh: 6
file content (293 lines) | stat: -rw-r--r-- 17,127 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<html>
   <head>
      <title>Boost.Regex: regex_token_iterator</title>
      <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
      <LINK href="../../../boost.css" type="text/css" rel="stylesheet"></head>
   <body>
      <P>
         <TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0">
            <TR>
               <td vAlign="top" width="300">
                  <h3><A href="../../../index.htm"><IMG height="86" alt="C++ Boost" src="../../../boost.png" width="277" border="0"></A></h3>
               </td>
               <TD width="353">
                  <H1 align="center">Boost.Regex</H1>
                  <H2 align="center">regex_token_iterator</H2>
               </TD>
               <td width="50">
                  <h3><A href="index.html"><IMG height="45" alt="Boost.Regex Index" src="uarrow.gif" width="43" border="0"></A></h3>
               </td>
            </TR>
         </TABLE>
      </P>
      <HR>
      <H3>Contents</H3>
      <dl class="index">
         <dt><A href="#synopsis">Synopsis</A> <dt><a href="#description">Description</a> <dt><A href="#examples">
                     Examples</A></dt></dl>
      <H3><A name="synopsis"></A>Synopsis</H3>
      <P>The template class <CODE>regex_token_iterator</CODE> is an iterator adapter; 
         that is to say it represents a new view of an existing iterator sequence, by 
         enumerating all the occurrences of a regular expression within that sequence, 
         and presenting one or more character sequence for each match found. Each 
         position enumerated by the iterator is a <A href="sub_match.html">sub_match</A> 
         object that represents what matched a particular sub-expression within the 
         regular expression. When class <CODE>regex_token_iterator</CODE> is used to 
         enumerate a single sub-expression with index -1, then the iterator performs 
         field splitting: that is to say it enumerates one character sequence for each 
         section of the character container sequence that does not match the regular 
         expression specified.</P>
      <PRE>
template &lt;class BidirectionalIterator, 
          class charT = iterator_traits&lt;BidirectionalIterator&gt;::value_type,
          class traits = regex_traits&lt;charT&gt;,
          class Allocator = allocator&lt;charT&gt; &gt;
class regex_token_iterator 
{
public:
   typedef          <A href="basic_regex.html">basic_regex</A>&lt;charT, traits, Allocator&gt;                   regex_type;
   typedef          <A href="sub_match.html">sub_match</A>                     
             &lt;BidirectionalIterator&gt;                                     value_type;
   typedef typename iterator_traits&lt;BidirectionalIterator&gt;::difference_type difference_type;
   typedef          const value_type*                                       pointer;
   typedef          const value_type&amp;                                       reference;
   typedef          std::forward_iterator_tag                               iterator_category;
   
   <A href="#c1">regex_token_iterator</A>();
   <A href="#c2">regex_token_iterator</A>(BidirectionalIterator a, BidirectionalIterator b, const regex_type&amp; re, 
                        int submatch = 0, <A href="match_flag_type.html">match_flag_type</A> m = match_default);
   <A href="#c3">regex_token_iterator</A>(BidirectionalIterator a, BidirectionalIterator b, const regex_type&amp; re, 
                        const std::vector&lt;int&gt;&amp; submatches, match_flag_type m = match_default);
   template &lt;std::size_t N&gt;
   <A href="#c4">regex_token_iterator</A>(BidirectionalIterator a, BidirectionalIterator b, const regex_type&amp; re, 
                        const int (&amp;submatches)[N], match_flag_type m = match_default);
   <A href="#c5">regex_token_iterator</A>(const regex_token_iterator&amp;);
   regex_token_iterator&amp; <A href="#o1">operator</A>=(const regex_token_iterator&amp;);
   bool <A href="#o2">operator</A>==(const regex_token_iterator&amp;)const;
   bool <A href="#o3">operator</A>!=(const regex_token_iterator&amp;)const;
   const value_type&amp; <A href="#o4">operator</A>*()const;
   const value_type* <A href="#o5">operator</A>-&gt;()const;
   regex_token_iterator&amp; <A href="#o6">operator</A>++();
   regex_token_iterator <A href="#o7">operator</A>++(int);
};

typedef regex_token_iterator&lt;const char*&gt;                   cregex_token_iterator;
typedef regex_token_iterator&lt;std::string::const_iterator&gt;   sregex_token_iterator;
#ifndef BOOST_NO_WREGEX
typedef regex_token_iterator&lt;const wchar_t*&gt;                wcregex_token_iterator;
typedef regex_token_iterator&lt;&lt;std::wstring::const_iterator&gt; wsregex_token_iterator;
#endif
</PRE>
      <H3><A name="description"></A>Description</H3>
      <PRE><A name=c1></A>regex_token_iterator();</PRE>
      <P><B> Effects:</B> constructs an end of sequence iterator.</P>
      <PRE><A name=c2></A>regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type&amp; re, 
                     int submatch = 0, match_flag_type m = match_default);</PRE>
      <P><B> Preconditions: </B><CODE>!re.empty()</CODE>.&nbsp; Object re shall exist 
         for the lifetime of the iterator constructed from it.</P>
      <P><B> Effects:</B> constructs a regex_token_iterator that will enumerate one 
         string for each regular expression match of the expression <EM>re</EM> found 
         within the sequence <EM>[a,b)</EM>, using match flags <EM>m</EM>.&nbsp; The 
         string enumerated is the&nbsp;sub-expression <EM>submatch </EM>for each match 
         found; if <EM>submatch </EM>is -1, then enumerates all the text sequences that 
         did not match the expression <EM>re </EM>(that is to performs field splitting).</P>
      <P><STRONG>Throws:</STRONG> <CODE>std::runtime_error</CODE> if the complexity of 
         matching the expression against an N character string begins to exceed O(N<SUP>2</SUP>), 
         or if the program runs out of stack space while matching the expression (if 
         Boost.regex is <A href="configuration.html">configured</A> in recursive mode), 
         or if the matcher exhausts it's permitted memory allocation (if Boost.regex is <A href="configuration.html">
            configured</A> in non-recursive mode).</P>
      <PRE><A name=c3></A>regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type&amp; re, 
                     const std::vector&lt;int&gt;&amp; submatches, match_flag_type m = match_default);</PRE>
      <P><B> Preconditions:</B> <CODE>submatches.size() &amp;&amp; !re.empty()</CODE>.&nbsp; 
         Object re shall exist for the lifetime of the iterator constructed from it.</P>
      <P><B> Effects:</B> constructs a regex_token_iterator that will enumerate <EM>submatches.size()</EM>
         strings for each regular expression match of the expression <EM>re</EM> found 
         within the sequence <EM>[a,b)</EM>, using match flags <EM>m</EM>.&nbsp; For 
         each match found one string will be enumerated&nbsp;for each sub-expression 
         index&nbsp;contained within <EM>submatches </EM>vector; if <EM>submatches[0] </EM>
         is -1, then the first string enumerated for each match will be all of the text 
         from end of the last match to the start of the current match, in addition there 
         will be one extra string enumerated when no more matches can be found: from the 
         end of the last match found, to the end of the underlying sequence.</P>
      <P><STRONG>Throws:</STRONG> <CODE>std::runtime_error</CODE> if the complexity of 
         matching the expression against an N character string begins to exceed O(N<SUP>2</SUP>), 
         or if the program runs out of stack space while matching the expression (if 
         Boost.regex is <A href="configuration.html">configured</A> in recursive mode), 
         or if the matcher exhausts it's permitted memory allocation (if Boost.regex is <A href="configuration.html">
            configured</A> in non-recursive mode).</P>
      <PRE><A name=c4></A>template &lt;std::size_t N&gt;
regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type&amp; re, 
                     const int (&amp;submatches)[R], match_flag_type m = match_default);</PRE>
      <P><B> Preconditions: </B><CODE>!re.empty()</CODE>.&nbsp; Object re shall exist 
         for the lifetime of the iterator constructed from it.</P>
      <P><STRONG>Effects:</STRONG></B> constructs a regex_token_iterator that will 
         enumerate&nbsp;<EM>R</EM> strings for each regular expression match of the 
         expression <EM>re</EM> found within the sequence <EM>[a,b)</EM>, using match 
         flags <EM>m</EM>.&nbsp; For each match found one string will be 
         enumerated&nbsp;for each sub-expression index&nbsp;contained within the <EM>submatches
         </EM>array; if <EM>submatches[0] </EM>is -1, then the first string enumerated 
         for each match will be all of the text from end of the last match to the start 
         of the current match, in addition there will be one extra string enumerated 
         when no more matches can be found: from the end of the last match found, to the 
         end of the underlying sequence.</P>
      <P><STRONG>Throws:</STRONG> <CODE>std::runtime_error</CODE> if the complexity of 
         matching the expression against an N character string begins to exceed O(N<SUP>2</SUP>), 
         or if the program runs out of stack space while matching the expression (if 
         Boost.regex is <A href="configuration.html">configured</A> in recursive mode), 
         or if the matcher exhausts it's permitted memory allocation (if Boost.regex is <A href="configuration.html">
            configured</A> in non-recursive mode).</P>
      <PRE><A name=c5></A>regex_token_iterator(const regex_token_iterator&amp; that);</PRE>
      <P><B> Effects: </B>constructs a copy of <CODE>that</CODE>.</P>
      <P><B> Postconditions:</B> <CODE>*this == that</CODE>.</P>
      <PRE><A name=o1></A>regex_token_iterator&amp; operator=(const regex_token_iterator&amp; that);</PRE>
      <P><B> Effects: </B>sets <CODE>*this</CODE> to be equal to&nbsp;<CODE>that</CODE>.</P>
      <P><B> Postconditions:</B> <CODE>*this == that</CODE>.</P>
      <PRE><A name=o2></A>bool operator==(const regex_token_iterator&amp;)const;</PRE>
      <P>
         <B>Effects: </B>returns true if *this is the same position as that.</P>
      <PRE><A name=o3></A>bool operator!=(const regex_token_iterator&amp;)const;</PRE>
      <P>
         <B>Effects: </B>returns <CODE>!(*this == that)</CODE>.</P>
      <PRE><A name=o4></A>const value_type&amp; operator*()const;</PRE>
      <P>
         <B>Effects: </B>returns the current character sequence being enumerated.</P>
      <PRE><A name=o5></A>const value_type* operator-&gt;()const;</PRE>
      <P>
         <B>Effects: </B>returns <CODE>&amp;(*this)</CODE>.</P>
      <PRE><A name=o6></A>regex_token_iterator&amp; operator++();</PRE>
      <P>
         <B>Effects: </B>Moves on to the next character sequence to be enumerated.</P>
      <P><STRONG>Throws:</STRONG> <CODE>std::runtime_error</CODE> if the complexity of 
         matching the expression against an N character string begins to exceed O(N<SUP>2</SUP>), 
         or if the program runs out of stack space while matching the expression (if 
         Boost.regex is <A href="configuration.html">configured</A> in recursive mode), 
         or if the matcher exhausts it's permitted memory allocation (if Boost.regex is <A href="configuration.html">
            configured</A> in non-recursive mode).</P>
      <B>
         <P>
         Returns:</B><CODE> *this</CODE>.</P><PRE><A name=o7></A>regex_token_iterator&amp; operator++(int);</PRE>
      <P><B> Effects:</B> constructs a copy <CODE>result</CODE> of <CODE>*this</CODE>, 
         then calls <CODE>++(*this)</CODE>.</P>
      <P><B> Returns:</B> <CODE>result</CODE>.<A name="examples"></A>
         <H3>Examples</H3>
      <P>The following <A href="../example/snippets/regex_token_iterator_eg_1.cpp">example</A>
         takes a string and splits it into a series of tokens:</P>
      <pre>
<FONT color=#008040>#include &lt;iostream&gt;</FONT>
<FONT color=#008040>#include &lt;boost/regex.hpp&gt;</FONT>

<B>using</B> <B>namespace</B> std;

<B>int</B> main(<B>int</B> argc)
{
   string s;
   <B>do</B>{
      <B>if</B>(argc == <FONT color=#0000a0>1</FONT>)
      {
         cout &lt;&lt; <FONT color=#0000ff>"Enter text to split (or \"quit\" to exit): "</FONT>;
         getline(cin, s);
         <B>if</B>(s == <FONT color=#0000ff>"quit"</FONT>) <B>break</B>;
      }
      <B>else</B>
         s = <FONT color=#0000ff>"This is a string of tokens"</FONT>;

      boost::regex re(<FONT color=#0000ff>"\\s+"</FONT>);
      boost::sregex_token_iterator i(s.begin(), s.end(), re, -<FONT color=#0000a0>1</FONT>);
      boost::sregex_token_iterator j;

      <B>unsigned</B> count = <FONT color=#0000a0>0</FONT>;
      <B>while</B>(i != j)
      {
         cout &lt;&lt; *i++ &lt;&lt; endl;
         count++;
      }
      cout &lt;&lt; <FONT color=#0000ff>"There were "</FONT> &lt;&lt; count &lt;&lt; <FONT color=#0000ff>" tokens found."</FONT> &lt;&lt; endl;

   }<B>while</B>(argc == <FONT color=#0000a0>1</FONT>);
   <B>return</B> <FONT color=#0000a0>0</FONT>;
}

</pre>
      <P>The following <A href="../example/snippets/regex_token_iterator_eg_2.cpp">example</A>
         takes a html file and outputs a list of all the linked files:</P>
      <pre>
<FONT color=#008040>#include &lt;fstream&gt;</FONT>
<FONT color=#008040>#include &lt;iostream&gt;</FONT>
<FONT color=#008040>#include &lt;iterator&gt;</FONT>
<FONT color=#008040>#include &lt;boost/regex.hpp&gt;</FONT>

boost::regex e(<FONT color=#0000ff>"&lt;\\s*A\\s+[^&gt;]*href\\s*=\\s*\"([^\"]*)\""</FONT>,
               boost::regex::normal | boost::regbase::icase);

<B>void</B> load_file(std::string&amp; s, std::istream&amp; is)
{
   s.erase();
   <I><FONT color=#000080>//</FONT></I>
   <I><FONT color=#000080>// attempt to grow string buffer to match file size,</FONT></I>
   <I><FONT color=#000080>// this doesn't always work...</FONT></I>
   s.reserve(is.rdbuf()-&gt;in_avail());
   <B>char</B> c;
   <B>while</B>(is.get(c))
   {
      <I><FONT color=#000080>// use logarithmic growth stategy, in case</FONT></I>
      <I><FONT color=#000080>// in_avail (above) returned zero:</FONT></I>
      <B>if</B>(s.capacity() == s.size())
         s.reserve(s.capacity() * <FONT color=#0000a0>3</FONT>);
      s.append(<FONT color=#0000a0>1</FONT>, c);
   }
}

<B>int</B> main(<B>int</B> argc, <B>char</B>** argv)
{
   std::string s;
   <B>int</B> i;
   <B>for</B>(i = <FONT color=#0000a0>1</FONT>; i &lt; argc; ++i)
   {
      std::cout &lt;&lt; <FONT color=#0000ff>"Findings URL's in "</FONT> &lt;&lt; argv[i] &lt;&lt; <FONT color=#0000ff>":"</FONT> &lt;&lt; std::endl;
      s.erase();
      std::ifstream is(argv[i]);
      load_file(s, is);
      boost::sregex_token_iterator i(s.begin(), s.end(), e, <FONT color=#0000a0>1</FONT>);
      boost::sregex_token_iterator j;
      <B>while</B>(i != j)
      {
         std::cout &lt;&lt; *i++ &lt;&lt; std::endl;
      }
   }
   <I><FONT color=#000080>//</FONT></I>
   <I><FONT color=#000080>// alternative method:</FONT></I>
   <I><FONT color=#000080>// test the array-literal constructor, and split out the whole</FONT></I>
   <I><FONT color=#000080>// match as well as $1....</FONT></I>
   <I><FONT color=#000080>//</FONT></I>
   <B>for</B>(i = <FONT color=#0000a0>1</FONT>; i &lt; argc; ++i)
   {
      std::cout &lt;&lt; <FONT color=#0000ff>"Findings URL's in "</FONT> &lt;&lt; argv[i] &lt;&lt; <FONT color=#0000ff>":"</FONT> &lt;&lt; std::endl;
      s.erase();
      std::ifstream is(argv[i]);
      load_file(s, is);
      <B>const</B> <B>int</B> subs[] = {<FONT color=#0000a0>1</FONT>, <FONT color=#0000a0>0</FONT>,};
      boost::sregex_token_iterator i(s.begin(), s.end(), e, subs);
      boost::sregex_token_iterator j;
      <B>while</B>(i != j)
      {
         std::cout &lt;&lt; *i++ &lt;&lt; std::endl;
      }
   }

   <B>return</B> <FONT color=#0000a0>0</FONT>;
}
</pre>
      <HR>
      <p>Revised 
         <!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan --> 
         24 Oct 2003 
         <!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
      <p><i> Copyright John Maddock&nbsp;1998- 
            <!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->  2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
      <P><I>Use, modification and distribution are subject to the Boost Software License, 
            Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
            or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
   </body>
</html>