File: lexer

package info (click to toggle)
bisonc%2B%2B 6.09.02-1
links: PTS, VCS
area: main
in suites: forky, sid
size: 5,984 kB
sloc: cpp: 9,375; ansic: 1,505; fortran: 1,134; makefile: 1,062; sh: 526; yacc: 84; lex: 60
file content (352 lines) | stat: -rw-r--r-- 12,670 bytes
%filenames scanner

%class-name = "Scanner" 
%token-path = "../tokens/tokens.h"

%debug
// %print-tokens

%x xstring pstring pxstring string rawstring comment quote block typespec
%x typecomment

OCTAL   [0-7]
OCT3    {OCTAL}{3}
HEX     [[:xdigit:]]
HEX2    {HEX}{2}
ID1     [[:alpha:]_]
ID2     [[:alnum:]_]
IDENT   {ID1}{ID2}*
NR      [[:digit:]]+

%%

<INITIAL,block>{

"{"         {
                    // open or count a nested a block 
                d_block.open(lineNr(), filename()); 
                begin(StartCondition_::block);
            }

    //  The whitespace-eating RegExes (REs) will normally simply consume the
    //  WS. However, if d_retWS is unequal 0 then WS is returned. This is
    //  sometimes needed (e.g., inside code blocks to be able to return the ws
    //  as part of the code-block). Comment is part of the WS returning REs
[ \t]+       {
                 if (d_block)
                     d_block += " ";
             }
             
[\n]+        {
                setLineNrs();
                if (d_block)
                    d_block += "\n";
             }

"//".*       // ignore eoln comment in source blocks

    // If comment is entered from `block' either a blank or a newline will be
    //  added to the block as soon as the matching end-comment is seen, and
    //  the scanner will return to its block-miniscanner state
"/*"         {
                 d_commentChar[0] = ' ';
                 begin(StartCondition_::comment);
             }
}

    //  Blocks start at { and end at their matching } char. They may contain
    //  comment and whitespace, but whitespace is reduced to single blanks or
    //  newlines. All STRING and QUOTE constants are kept as-is, and are
    //  registered as skip-ranges for $-checks
<block>{
    R\"{IDENT}?\(   rawString();

    "}"         {
                    if (d_block.close())    // close a block
                    {
                        begin(StartCondition_::INITIAL);
                        return Tokens::BLOCK;
                    }
                }
    
    "\""        {
                    begin(StartCondition_::string);
                    more();
                }
    
    "'"         {
                    begin(StartCondition_::quote);
                    more();
                }


    // negative dollar indices with tag indicate a .get request.
    // there is no reference option:
    // "_$<"{IDENT}">"-{NR}    |       // refDid_n

            // see: parser/substituteblock.cc
            // AtDollar Pattern:
    "$$"[ \t\n]*=           |       // assignments are refDD or refD_
    "$"-?{NR}[ \t\n]*=      assignment();
 
    "$$("                   |
    @@                      |
    @{NR}                   |
    "_$$"                   |       
    "$$"\.?                 |
    "$$->"                  |
    "_$"-?{NR}              |
    \$-?{NR}                |
    \$-?{NR}\.              |
    \$-?{NR}"->"            |
    "$<"{IDENT}">"-{NR}\.?  |                               
    "$<"{IDENT}">"-{NR}"->" d_block.atDollar(lineNr(), d_matched, false);

    .                       d_block(d_matched);
}

%baseclass-header[ \t]+         {
                                    begin(StartCondition_::pxstring);
                                    return Tokens::BASECLASS_HEADER;
                                }
%baseclass-preinclude[ \t]+     {
                                    begin(StartCondition_::pxstring);
                                    return Tokens::BASECLASS_PREINCLUDE;
                                }
%class-header[ \t]+             {
                                    begin(StartCondition_::pxstring);
                                    return Tokens::CLASS_HEADER;
                                }
%class-name                     return Tokens::CLASS_NAME;
%constructor-checks             return Tokens::CONSTRUCTOR_CHECKS;
%default-actions                return Tokens::DEFAULT_ACTIONS;
%debug                          return Tokens::DEBUGFLAG;
%error-verbose                  return Tokens::ERROR_VERBOSE;
%expect                         return Tokens::EXPECT;
%filenames[ \t]+                {
                                    begin(StartCondition_::pxstring);
                                    return Tokens::FILENAMES;
                                }
"%flex"                         return Tokens::FLEX;
%implementation-header[ \t]+    {
                                    begin(StartCondition_::pxstring);
                                    return Tokens::IMPLEMENTATION_HEADER;
                                }
%include[ \t]+                  {
                                    begin(StartCondition_::pxstring);
                                    d_include = true;
                                }
%left                           return Tokens::LEFT;
%locationstruct                 return Tokens::LOCATIONSTRUCT;
%lsp-needed                     return Tokens::LSP_NEEDED;
%ltype[ \t]+                    {
                                    begin(StartCondition_::xstring);
                                    return Tokens::LTYPE;
                                }
%namespace                      return Tokens::NAMESPACE;
%negative-dollar-indices        return Tokens::NEG_DOLLAR;
%no-lines                       return Tokens::NOLINES;
%nonassoc                       return Tokens::NONASSOC;
%parsefun-source[ \t]+          {
                                    begin(StartCondition_::pxstring);
                                    return Tokens::PARSEFUN_SOURCE;
                                }
%polymorphic                    return Tokens::POLYMORPHIC;
%prec                           return Tokens::PREC;
%print-tokens                   return Tokens::PRINT_TOKENS;
%prompt                         return Tokens::PROMPT;
%required-tokens                return Tokens::REQUIRED;
%right                          return Tokens::RIGHT;
%scanner[ \t]+                  {
                                    begin(StartCondition_::pxstring);
                                    return Tokens::SCANNER;
                                }
%scanner-class-name[ \t]+       {
                                    begin(StartCondition_::pxstring);
                                    return Tokens::SCANNER_CLASS_NAME;
                                }
%scanner-token-function[ \t]+   {
                                    begin(StartCondition_::pxstring);
                                    return Tokens::SCANNER_TOKEN_FUNCTION;
                                }
%scanner-matched-text-function[ \t]+ {
                                    begin(StartCondition_::pxstring);
                                    return 
                                        Tokens::SCANNER_MATCHED_TEXT_FUNCTION;
                                }
%stack-expansion[ \t]+          return Tokens::STACK_EXPANSION;
%start                          return Tokens::START;
%stype[ \t]+                    {
                                    begin(StartCondition_::xstring);
                                    return Tokens::STYPE;
                                }
%tag-mismatches                 return Tokens::WARN_TAGS;
%target-directory[ \t]+         {
                                    begin(StartCondition_::pxstring);
                                    return Tokens::TARGET_DIRECTORY;
                                }
%thread-safe                    return Tokens::THREAD_SAFE;
%token                          return Tokens::TOKEN;
%token-class                    return Tokens::TOKEN_CLASS;
%token-namespace                return Tokens::TOKEN_NAMESPACE;
%token-path[ \t]+               {
                                    begin(StartCondition_::pxstring);
                                    return Tokens::TOKEN_PATH;
                                }
%type                           return Tokens::TYPE;
%union                          return Tokens::UNION;
%weak-tags                      return Tokens::WEAK_TAGS;
"%%"                            return Tokens::TWO_PERCENTS;

"'"                             {
                                    begin(StartCondition_::quote);
                                    more();
                                }

"\""                            {
                                    begin(StartCondition_::string);
                                    more();
                                }

{IDENT}                         return Tokens::IDENTIFIER;

[[:digit:]]+                    {
                                    d_number = stoul(d_matched);
                                    return Tokens::NUMBER;
                                }

.                               return d_matched[0];

    // pxstring is activated after a directive has been sensed.
    // it extracts a string, pstring or any sequence of non-blank characters,
<pxstring>{
    "\""    {
                more();
                begin(StartCondition_::string);
            }
    "<"     {
                more();
                begin(StartCondition_::pstring);
            }
    .       {
                accept(0);
                begin(StartCondition_::xstring);
            }
    \n      return eoln();
}
    // string may be entered from block and pxstring
    // strings are all series (including escaped chars, like \") surrounded by
    // double quotes:
<string>{
    "\""    {
                if (handleXstring(0))
                    return Tokens::STRING;
            }
    "\\".   |              
    .       more();
    \n      return eoln();
}

    // a pstring is a string surrounded by < and >
<pstring>{      
    ">"     {
                if (handleXstring(0))
                    return Tokens::STRING;
            }
    "\\".   |              
    .       more();
    \n      return eoln();
}

    //  xstring returns the next string delimited by either blanks, tabs,
    //  newlines or C/C++ comment. 
<xstring>{
    [[:space:]]     {
                        if (handleXstring(1))
                            return Tokens::STRING;
                    }

    "//"            |
    "/*"            {
                        if (handleXstring(2))
                            return Tokens::STRING;
                    }

    .           more();
}

<rawstring>{
    \){IDENT}?\"    checkEndOfRawString();

    .|\n            more();
}

<comment>{
.                  
\n                 {
                        setLineNrs();
                        d_commentChar[0] = '\n';
                    }
"*/"               {
                       if (!d_block)
                           begin(StartCondition_::INITIAL);
                       else
                       {
                           d_block += d_commentChar;
                           begin(StartCondition_::block);
                       }
                   }
}
    //  quote may be entered from INITIAL and block. 
    //  quoted constants start with a quote. They may be octal or hex numbers,
    //  escaped chars, or quoted constants 
<quote>{

"\\"{OCT3}"'"        returnQuoted(&Scanner::octal);
                     
"\\x"{HEX2}"'"       returnQuoted(&Scanner::hexadecimal);
                     
"\\"[abfnrtv]"'"     {
                         if (d_block(d_matched))
                             begin(StartCondition_::block);
                         else
                         {
                             begin(StartCondition_::INITIAL);
                             escape();       // quoted escape char
                             return Tokens::QUOTE;
                         }
                     }

"\\"."'"            returnQuoted(&Scanner::matched2);

."'"                returnQuoted(&Scanner::matched1);

[^']+"'"            returnQuoted(&Scanner::multiCharQuote);

}

    // a typespec holds all chars after a ':' until a ';' or '%' (which are
    // pushed back). It is used as a type specification in
    // parser/inc/directives. Escape characters are interpreted
<typespec>{
    \n              d_typeName += ' ';  // convert newlines to spaces

    \\.             |                           // add escaped chars as-is
    [^;%]           d_typeName += matched();    // accept all until ; or %

    "//".*          // ignore EOLN comment

                                        // ignore std C comment
    "/*"            begin(StartCondition_::typecomment);
    
    [;%]            returnTypeSpec();   // back to INITIAL, returns IDENTIFIER
}

<typecomment>{
    .|\n            // ignored

    "*/"            {
                        d_typeName += ' ';
                        begin(StartCondition_::typespec);
                    }
}