File: strip_comments_lexer.cpp

package info (click to toggle)
boost1.74 1.74.0-9
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 464,084 kB
  • sloc: cpp: 3,338,324; xml: 131,293; python: 33,088; ansic: 14,336; asm: 4,034; sh: 3,351; makefile: 1,193; perl: 1,036; yacc: 478; php: 212; ruby: 102; lisp: 24; sql: 13; csh: 6
file content (172 lines) | stat: -rw-r--r-- 5,561 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
//  Copyright (c) 2001-2010 Hartmut Kaiser
// 
//  Distributed under the Boost Software License, Version 1.0. (See accompanying 
//  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

//  This example is the equivalent to the following lex program:
//
//       %{
//       /* INITIAL is the default start state.  COMMENT is our new  */
//       /* state where we remove comments.                          */
//       %}
// 
//       %s COMMENT
//       %%
//       <INITIAL>"//".*    ;
//       <INITIAL>"/*"      BEGIN COMMENT; 
//       <INITIAL>.         ECHO;
//       <INITIAL>[\n]      ECHO;
//       <COMMENT>"*/"      BEGIN INITIAL;
//       <COMMENT>.         ;
//       <COMMENT>[\n]      ;
//       %%
// 
//       main() 
//       {
//         yylex();
//       }
//
//  Its purpose is to strip comments out of C code.
//
//  Additionally this example demonstrates the use of lexer states to structure
//  the lexer definition.

// #define BOOST_SPIRIT_LEXERTL_DEBUG

#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_statement.hpp>
#include <boost/spirit/include/phoenix_core.hpp>

#include <iostream>
#include <string>

#include "example.hpp"

using namespace boost::spirit;  

///////////////////////////////////////////////////////////////////////////////
//  Token definition: We use the lexertl based lexer engine as the underlying 
//                    lexer type.
///////////////////////////////////////////////////////////////////////////////
enum tokenids 
{
    IDANY = lex::min_token_id + 10,
    IDEOL = lex::min_token_id + 11
};

///////////////////////////////////////////////////////////////////////////////
// Simple custom semantic action function object used to print the matched
// input sequence for a particular token
template <typename Char, typename Traits>
struct echo_input_functor
{
    echo_input_functor (std::basic_ostream<Char, Traits>& os_)
      : os(os_) {}

    // This is called by the semantic action handling code during the lexing
    template <typename Iterator, typename Context>
    void operator()(Iterator const& b, Iterator const& e
      , BOOST_SCOPED_ENUM(boost::spirit::lex::pass_flags)&
      , std::size_t&, Context&) const
    {
        os << std::string(b, e);
    }

    std::basic_ostream<Char, Traits>& os;
};

template <typename Char, typename Traits>
inline echo_input_functor<Char, Traits> 
echo_input(std::basic_ostream<Char, Traits>& os)
{
    return echo_input_functor<Char, Traits>(os);
}

///////////////////////////////////////////////////////////////////////////////
// Another simple custom semantic action function object used to switch the 
// state of the lexer 
struct set_lexer_state
{
    set_lexer_state(char const* state_)
      : state(state_) {}

    // This is called by the semantic action handling code during the lexing
    template <typename Iterator, typename Context>
    void operator()(Iterator const&, Iterator const&
      , BOOST_SCOPED_ENUM(boost::spirit::lex::pass_flags)&
      , std::size_t&, Context& ctx) const
    {
        ctx.set_state_name(state.c_str());
    }

    std::string state;
};

///////////////////////////////////////////////////////////////////////////////
template <typename Lexer>
struct strip_comments_tokens : lex::lexer<Lexer>
{
    strip_comments_tokens()
      : strip_comments_tokens::base_type(lex::match_flags::match_default)
    {
        // define tokens and associate them with the lexer
        cppcomment = "\"//\"[^\n]*";    // '//[^\n]*'
        ccomment = "\"/*\"";            // '/*'
        endcomment = "\"*/\"";          // '*/'
        any = std::string(".");
        eol = "\n";

        // The following tokens are associated with the default lexer state 
        // (the "INITIAL" state). Specifying 'INITIAL' as a lexer state is 
        // strictly optional.
        this->self 
            =   cppcomment
            |   ccomment    [ set_lexer_state("COMMENT") ]
            |   eol         [ echo_input(std::cout) ]
            |   any         [ echo_input(std::cout) ]
            ;

        // The following tokens are associated with the lexer state 'COMMENT'.
        this->self("COMMENT") 
            =   endcomment  [ set_lexer_state("INITIAL") ]
            |   "\n"
            |   std::string(".") 
            ;
    }

    lex::token_def<> cppcomment, ccomment, endcomment, any, eol;
};

  ///////////////////////////////////////////////////////////////////////////////
int main(int argc, char* argv[])
{
    // iterator type used to expose the underlying input stream
    typedef std::string::iterator base_iterator_type;

    // lexer type
    typedef 
        lex::lexertl::actor_lexer<lex::lexertl::token<base_iterator_type> > 
    lexer_type;

    // now we use the types defined above to create the lexer and grammar
    // object instances needed to invoke the parsing process
    strip_comments_tokens<lexer_type> strip_comments;             // Our lexer

    // No parsing is done altogether, everything happens in the lexer semantic
    // actions.
    std::string str (read_from_file(1 == argc ? "strip_comments.input" : argv[1]));
    base_iterator_type first = str.begin();
    bool r = lex::tokenize(first, str.end(), strip_comments);

    if (!r) {
        std::string rest(first, str.end());
        std::cerr << "Lexical analysis failed\n" << "stopped at: \"" 
                  << rest << "\"\n";
    }
    return 0;
}