1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
|
/*
-*- mode: c++; c-basic-offset:4 -*-
This file is part of libdap, A C++ implementation of the OPeNDAP Data
Access Protocol.
Copyright (c) 2014 OPeNDAP, Inc.
Author: James Gallagher <jgallagher@opendap.org>
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
*/
%{ /* -*- C++ -*- */
//#include "config.h"
#include <string>
#include "Error.h"
#include "D4FunctionScanner.h"
/* typedef to make the returns for the tokens shorter */
/* NB: It would be best to use the same scanner (and maybe parser) for
both the D4 CE and Function parameters, but for the initial version
far less complexity is require by the Function expression scanner
(since the initial version will just support variables, constants,
functions and the $<type> array special form) and not function arguments
that are general expressions (like array slicing and/or filters).
This comment is here because this is the first place where there is
coupling between the CE parser and its scanner. I'm not sure, however,
if one string can be parsed by two parsers if they are using two scanners,
so extending the Function parser to allow function args to be any CE
clause may mean some more serious work with the parsers.
jhrg 3/10/14 */
typedef libdap::D4FunctionParser::token token;
/* This was added because of some notes on the net about compiler version
issues. I don't know if it's needed when using the C++ mode of flex. */
#undef yywrap
#define yywrap() 1
/* define yyterminate as this instead of NULL */
#define yyterminate() return(token::END)
#define YY_FATAL_ERROR(msg) {\
throw(libdap::Error(malformed_expr, std::string("Error scanning function expression text: ") + std::string(msg))); \
}
%}
%option c++
%option yyclass="D4FunctionScanner"
/* Use this if several scanners are needed. This will cause flex to
#define yyFlexLexer to be <prefix>FlexLexer (the yyFlexLexer is defined
in lex.<prefix>.cc. jhrg 8/8/13 */
%option prefix="d4_function"
/* These two options turn on line counting - useful for error messages -
and debugging, respectively. When debugging is on, it's possible to see
which scanner rules are used at which points in the input. */
%option yylineno
%option debug
/* Do not output the default rule (where any unmatched input is echoed to
stdout). When set, nodefault will cause the scanner to exit on an error. */
%option nodefault
/* noyywrap makes the scanner assume that EOF/EOS is the end of the input.
If this is not set, the scanner will assume there are more files to
scan. */
%option noyywrap
%option nounput
/* When set, warn prints a message when the default rule can be matched
but nodefault is given (among other warnings). */
%option warn
%option batch
%x quote
/* This pattern is slightly different from the one used by the CE scanner
because it allows a WORD to start with a '#' so that the #<type>
array constant syntax can be used in functions. Otherwise, a WORD must
be able to contain this hideous mix of characters because a variable
can. jhrg 3/10/14 */
WORD [-+a-zA-Z0-9_%*\\~@!#][-+a-zA-Z0-9_%*\\~@!#]*
/* I added these tokens because floating point values may contain dots and
added a '.' to WORD will break the parsing of paths (or make for some
fairly obscure code - where $Float32() takes tokens that match 'path'.
Since we have a separate scanner for the function expressions, might as
well add a FLOAT token... jhg 3/17/14
FLOAT [-+eE.0-9][-+eE.0-9]*
*/
%{
// Code run each time a pattern is matched
#define YY_USER_ACTION loc->columns(yyleng);
%}
%%
%{
// Code run each time yylex is called
loc->step();
%}
"," return token::COMMA;
";" return token::SEMICOLON;
":" return token::COLON;
"(" return token::LPAREN;
")" return token::RPAREN;
"/" return token::GROUP_SEP;
"." return token::PATH_SEP;
"$Byte" return token::DOLLAR_BYTE;
"$UInt8" return token::DOLLAR_UINT8;
"$Int8" return token::DOLLAR_INT8;
"$UInt16" return token::DOLLAR_UINT16;
"$Int16" return token::DOLLAR_INT16;
"$UInt32" return token::DOLLAR_UINT32;
"$Int32" return token::DOLLAR_INT32;
"$UInt64" return token::DOLLAR_UINT64;
"$Int64" return token::DOLLAR_INT64;
"$Float32" return token::DOLLAR_FLOAT32;
"$Float64" return token::DOLLAR_FLOAT64;
[ \t]+ /* ignore these */
[\r\n]+ /* ignore these */
{WORD} { yylval->build<std::string>(yytext); return token::WORD; }
<INITIAL><<EOF>> return token::END;
["] { BEGIN(quote); yymore(); }
<quote>[^"\\]* yymore(); /* Anything that's not a double quote or a backslash */
<quote>[\\]["] yymore(); /* This matches the escaped double quote (\") */
<quote>[\\]{2} yymore(); /* This matches an escaped escape (\\) */
<quote>[\\]{1} {
BEGIN(INITIAL);
if (yytext) {
YY_FATAL_ERROR("Inside a string, backslash (\\) can escape a double quote or must itself be escaped (\\\\).");
}
}
<quote>["] {
/* An unescaped double quote in the 'quote' state indicates the end of the string */
BEGIN(INITIAL);
yylval->build<std::string>(yytext);
return token::STRING;
}
<quote><<EOF>> {
BEGIN(INITIAL); /* resetting the state is needed for reentrant parsers */
YY_FATAL_ERROR("Unterminated quote");
}
. {
BEGIN(INITIAL);
if (yytext) {
YY_FATAL_ERROR("Characters found in the input were not recognized.");
}
}
%%
|