1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
|
// Copyright (c) 2001-2010 Hartmut Kaiser
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
// This example is the equivalent to the following lex program:
//
// %{
// /* INITIAL is the default start state. COMMENT is our new */
// /* state where we remove comments. */
// %}
//
// %s COMMENT
// %%
// <INITIAL>"//".* ;
// <INITIAL>"/*" BEGIN COMMENT;
// <INITIAL>. ECHO;
// <INITIAL>[\n] ECHO;
// <COMMENT>"*/" BEGIN INITIAL;
// <COMMENT>. ;
// <COMMENT>[\n] ;
// %%
//
// main()
// {
// yylex();
// }
//
// Its purpose is to strip comments out of C code.
//
// Additionally this example demonstrates the use of lexer states to structure
// the lexer definition.
// #define BOOST_SPIRIT_LEXERTL_DEBUG
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_container.hpp>
#include <iostream>
#include <string>
#include "example.hpp"
using namespace boost::spirit;
///////////////////////////////////////////////////////////////////////////////
// Token definition: We use the lexertl based lexer engine as the underlying
// lexer type.
///////////////////////////////////////////////////////////////////////////////
enum tokenids
{
IDANY = lex::min_token_id + 10
};
template <typename Lexer>
struct strip_comments_tokens : lex::lexer<Lexer>
{
strip_comments_tokens()
: strip_comments_tokens::base_type(lex::match_flags::match_default)
{
// define tokens and associate them with the lexer
cppcomment = "\"//\"[^\n]*"; // '//[^\n]*'
ccomment = "\"/*\""; // '/*'
endcomment = "\"*/\""; // '*/'
// The following tokens are associated with the default lexer state
// (the "INITIAL" state). Specifying 'INITIAL' as a lexer state is
// strictly optional.
this->self.add
(cppcomment) // no explicit token id is associated
(ccomment)
(".", IDANY) // IDANY is the token id associated with this token
// definition
;
// The following tokens are associated with the lexer state "COMMENT".
// We switch lexer states from inside the parsing process using the
// in_state("COMMENT")[] parser component as shown below.
this->self("COMMENT").add
(endcomment)
(".", IDANY)
;
}
lex::token_def<> cppcomment, ccomment, endcomment;
};
///////////////////////////////////////////////////////////////////////////////
// Grammar definition
///////////////////////////////////////////////////////////////////////////////
template <typename Iterator>
struct strip_comments_grammar : qi::grammar<Iterator>
{
template <typename TokenDef>
strip_comments_grammar(TokenDef const& tok)
: strip_comments_grammar::base_type(start)
{
// The in_state("COMMENT")[...] parser component switches the lexer
// state to be 'COMMENT' during the matching of the embedded parser.
start = *( tok.ccomment
>> qi::in_state("COMMENT")
[
// the lexer is in the 'COMMENT' state during
// matching of the following parser components
*token(IDANY) >> tok.endcomment
]
| tok.cppcomment
| qi::token(IDANY) [ std::cout << _1 ]
)
;
}
qi::rule<Iterator> start;
};
///////////////////////////////////////////////////////////////////////////////
int main(int argc, char* argv[])
{
// iterator type used to expose the underlying input stream
typedef std::string::iterator base_iterator_type;
// lexer type
typedef
lex::lexertl::lexer<lex::lexertl::token<base_iterator_type> >
lexer_type;
// iterator type exposed by the lexer
typedef strip_comments_tokens<lexer_type>::iterator_type iterator_type;
// now we use the types defined above to create the lexer and grammar
// object instances needed to invoke the parsing process
strip_comments_tokens<lexer_type> strip_comments; // Our lexer
strip_comments_grammar<iterator_type> g (strip_comments); // Our parser
// Parsing is done based on the token stream, not the character
// stream read from the input.
std::string str (read_from_file(1 == argc ? "strip_comments.input" : argv[1]));
base_iterator_type first = str.begin();
bool r = lex::tokenize_and_parse(first, str.end(), strip_comments, g);
if (r) {
std::cout << "-------------------------\n";
std::cout << "Parsing succeeded\n";
std::cout << "-------------------------\n";
}
else {
std::string rest(first, str.end());
std::cout << "-------------------------\n";
std::cout << "Parsing failed\n";
std::cout << "stopped at: \"" << rest << "\"\n";
std::cout << "-------------------------\n";
}
std::cout << "Bye... :-) \n\n";
return 0;
}
|