1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
|
// Copyright (c) 2001-2010 Hartmut Kaiser
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
// This example is the equivalent to the following lex program:
/*
//[wcp_flex_version
%{
int c = 0, w = 0, l = 0;
%}
word [^ \t\n]+
eol \n
%%
{word} { ++w; c += yyleng; }
{eol} { ++c; ++l; }
. { ++c; }
%%
main()
{
yylex();
printf("%d %d %d\n", l, w, c);
}
//]
*/
// Its purpose is to do the word count function of the wc command in UNIX. It
// prints the number of lines, words and characters in a file.
//
// The example additionally demonstrates how to use the add_pattern(...)(...)
// syntax to define lexer patterns. These patterns are essentially parameter-
// less 'macros' for regular expressions, allowing to simplify their
// definition.
// #define BOOST_SPIRIT_LEXERTL_DEBUG
#define BOOST_VARIANT_MINIMIZE_SIZE
#include <boost/config/warning_disable.hpp>
//[wcp_includes
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_statement.hpp>
#include <boost/spirit/include/phoenix_container.hpp>
//]
#include <iostream>
#include <string>
#include "example.hpp"
//[wcp_namespaces
using namespace boost::spirit;
using namespace boost::spirit::ascii;
//]
///////////////////////////////////////////////////////////////////////////////
// Token definition: We use the lexertl based lexer engine as the underlying
// lexer type.
///////////////////////////////////////////////////////////////////////////////
//[wcp_token_ids
enum tokenids
{
IDANY = lex::min_token_id + 10
};
//]
//[wcp_token_definition
template <typename Lexer>
struct word_count_tokens : lex::lexer<Lexer>
{
word_count_tokens()
{
// define patterns (lexer macros) to be used during token definition
// below
this->self.add_pattern
("WORD", "[^ \t\n]+")
;
// define tokens and associate them with the lexer
word = "{WORD}"; // reference the pattern 'WORD' as defined above
// this lexer will recognize 3 token types: words, newlines, and
// everything else
this->self.add
(word) // no token id is needed here
('\n') // characters are usable as tokens as well
(".", IDANY) // string literals will not be escaped by the library
;
}
// the token 'word' exposes the matched string as its parser attribute
lex::token_def<std::string> word;
};
//]
///////////////////////////////////////////////////////////////////////////////
// Grammar definition
///////////////////////////////////////////////////////////////////////////////
//[wcp_grammar_definition
template <typename Iterator>
struct word_count_grammar : qi::grammar<Iterator>
{
template <typename TokenDef>
word_count_grammar(TokenDef const& tok)
: word_count_grammar::base_type(start)
, c(0), w(0), l(0)
{
using boost::phoenix::ref;
using boost::phoenix::size;
start = *( tok.word [++ref(w), ref(c) += size(_1)]
| lit('\n') [++ref(c), ++ref(l)]
| qi::token(IDANY) [++ref(c)]
)
;
}
std::size_t c, w, l;
qi::rule<Iterator> start;
};
//]
///////////////////////////////////////////////////////////////////////////////
//[wcp_main
int main(int argc, char* argv[])
{
/*< Define the token type to be used: `std::string` is available as the
type of the token attribute
>*/ typedef lex::lexertl::token<
char const*, boost::mpl::vector<std::string>
> token_type;
/*< Define the lexer type to use implementing the state machine
>*/ typedef lex::lexertl::lexer<token_type> lexer_type;
/*< Define the iterator type exposed by the lexer type
>*/ typedef word_count_tokens<lexer_type>::iterator_type iterator_type;
// now we use the types defined above to create the lexer and grammar
// object instances needed to invoke the parsing process
word_count_tokens<lexer_type> word_count; // Our lexer
word_count_grammar<iterator_type> g (word_count); // Our parser
// read in the file int memory
std::string str (read_from_file(1 == argc ? "word_count.input" : argv[1]));
char const* first = str.c_str();
char const* last = &first[str.size()];
/*< Parsing is done based on the token stream, not the character
stream read from the input. The function `tokenize_and_parse()` wraps
the passed iterator range `[first, last)` by the lexical analyzer and
uses its exposed iterators to parse the token stream.
>*/ bool r = lex::tokenize_and_parse(first, last, word_count, g);
if (r) {
std::cout << "lines: " << g.l << ", words: " << g.w
<< ", characters: " << g.c << "\n";
}
else {
std::string rest(first, last);
std::cerr << "Parsing failed\n" << "stopped at: \""
<< rest << "\"\n";
}
return 0;
}
//]
|