File: D4ConstraintEvaluator.cc

package info (click to toggle)
libdap 3.20.11-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 24,568 kB
  • sloc: cpp: 50,809; sh: 41,536; xml: 23,511; ansic: 20,030; yacc: 2,508; exp: 1,544; makefile: 990; lex: 309; perl: 52; fortran: 8
file content (482 lines) | stat: -rw-r--r-- 18,182 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
// -*- mode: c++; c-basic-offset:4 -*-

// This file is part of libdap, A C++ implementation of the OPeNDAP Data
// Access Protocol.

// Copyright (c) 2002,2003 OPeNDAP, Inc.
// Author: James Gallagher <jgallagher@opendap.org>
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
//
// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.

#include <string>
#include <sstream>
#include <iterator>

//#define DODS_DEBUG 

#include "D4CEScanner.h"
#include "D4ConstraintEvaluator.h"
#include "d4_ce_parser.tab.hh"

#include "DMR.h"
#include "D4Group.h"
#include "D4Dimensions.h"
#include "D4Maps.h"
#include "BaseType.h"
#include "Array.h"
#include "Constructor.h"
#include "D4Sequence.h"

#include "D4RValue.h"
#include "D4FilterClause.h"

#include "escaping.h"
#include "parser.h"		// for get_ull()
#include "debug.h"

// Always define this for a production release.
#define PREVENT_XXS_VIA_CE 1
#if NDEBUG && !PREVENT_XXS_VIA_CE
#error("Never release libdap with PREVENT_XXS_VIA_CE turned off")
#endif

namespace libdap {

bool D4ConstraintEvaluator::parse(const std::string &expr)
{
    
    d_expr = expr;	// set for error messages. See the %initial-action section of .yy

    DBG(cerr << "Entering D4ConstraintEvaluator::parse: "  << endl);
    std::istringstream iss(expr);
    D4CEScanner scanner(iss);
    D4CEParser parser(scanner, *this /* driver */);

    if (trace_parsing()) {
        parser.set_debug_level(1);
        parser.set_debug_stream(std::cerr);
    }

    if(expr.empty()) 
        d_dmr->set_ce_empty(true);

    return parser.parse() == 0;
}

/**
 * print an error message. If PREVENT_XXS_VIA_CE is true (it should be), then
 * id won't be printed. The value of 'ident' is a literal that identifies the
 * parse rule to help locate the source of the error.
 * @param ident
 */
void D4ConstraintEvaluator::throw_not_found(const string &/* id */, const string &/* ident */)
{
#if PREVENT_XXS_VIA_CE
    throw Error(no_such_variable, string("The constraint expression referenced a variable that was not found in the dataset."));
#else
    throw Error(no_such_variable, d_expr + ": The variable " + id + " was not found in the dataset (" + ident + ").");
#endif
}

void D4ConstraintEvaluator::throw_not_array(const string &/* id */, const string &/* ident */)
{
#if PREVENT_XXS_VIA_CE
    throw Error(no_such_variable, string("The constraint expression referenced an Array that was not found in the dataset."));
#else
    throw Error(no_such_variable, d_expr + ": The variable '" + id + "' is not an Array variable (" + ident + ").");
#endif
}

void D4ConstraintEvaluator::search_for_and_mark_arrays(BaseType *btp)
{
    DBG(cerr << "Entering D4ConstraintEvaluator::search_for_and_mark_arrays...(" << btp->name() << ")" << endl);

    assert(btp->is_constructor_type());

    Constructor *ctor = static_cast<Constructor*>(btp);
    for (Constructor::Vars_iter i = ctor->var_begin(), e = ctor->var_end(); i != e; ++i) {
        switch ((*i)->type()) {
        case dods_array_c:
            DBG(cerr << "Found an array: " << (*i)->name() << endl);
            mark_array_variable(*i);
            break;
        case dods_structure_c:
        case dods_sequence_c:
            DBG(cerr << "Found a ctor: " << (*i)->name() << endl);
            search_for_and_mark_arrays(*i);
            break;
        default:
            break;
        }
    }
}

/**
 * When an identifier is used in a CE, is becomes part of the 'current projection,'
 * which means it is part of the set of variables to be sent back to the client. This
 * method sets a flag in the variable (send_p; pronounced 'send predicate') indicating that.
 *
 * @note This will check if the variable is an array and set it's slices accordingly
 * @param btp BaseType pointer to the variable. Must be non-null
 * @return The BaseType* to the variable; the send_p flag is set as a side effect.
 */
BaseType *
D4ConstraintEvaluator::mark_variable(BaseType *btp)
{
    assert(btp);

    DBG(cerr << "In D4ConstraintEvaluator::mark_variable... (" << btp->name() << "; " << btp->type_name() << ")" << endl);

    btp->set_send_p(true);

    if (btp->type() == dods_array_c) {
        mark_array_variable(btp);
    }

    // Test for Constructors and marks arrays they contain
    if (btp->is_constructor_type()) {
        search_for_and_mark_arrays(btp);
    }
    else if (btp->type() == dods_array_c && btp->var() && btp->var()->is_constructor_type()) {
        search_for_and_mark_arrays(btp->var());
    }

    // Now set the parent variables
    BaseType *parent = btp->get_parent();
    while (parent) {
        parent->BaseType::set_send_p(true); // Just set the parent using BaseType's impl.
        parent = parent->get_parent();
    }

    return btp;
}

static bool array_uses_shared_dimension(Array *map, D4Dimension *source_dim)
{
    for (Array::Dim_iter d = map->dim_begin(), e = map->dim_end(); d != e; ++d) {
        if (source_dim->name() == (*d).name) return true;
    }

    return false;
}

/**
 * Add an array to the current projection with slicing. Calling this method will result
 * in the array being returned with anonymous dimensions.
 *
 * @note If btp is an array that has shared dimensions and uses '[]' where a shared dimension
 * is found and if that shared dimension has been sliced, then the slice is used as the array's
 * slice for that dimension (there must be an easier way to explain that...)
 *
 * @param btp
 * @return The BaseType* to the Array variable; the send_p and slicing information is
 * set as a side effect.
 */

// Note: If a Map is not part of the current projection, do not include mention of it
// in the response DMR (CDMR)
BaseType *
D4ConstraintEvaluator::mark_array_variable(BaseType *btp)
{
    assert(btp->type() == dods_array_c);

    Array *a = static_cast<Array*>(btp);

    // If an array appears in a CE without the slicing operators ([]) we still have to
    // call add_constraint(...) for all of it's sdims for them to appear in
    // the Constrained DMR.
    if (d_indexes.empty()) {
        for (Array::Dim_iter d = a->dim_begin(), de = a->dim_end(); d != de; ++d) {
            D4Dimension *dim = a->dimension_D4dim(d);
            if (dim) {
                a->add_constraint(d, dim);
            }
        }
    }
    else {
        // Test that the indexes and dimensions match in number
        if (d_indexes.size() != a->dimensions())
            throw Error(malformed_expr, "The index constraint for '" + btp->name() + "' does not match its rank.");

        Array::Dim_iter d = a->dim_begin();
        for (vector<index>::iterator i = d_indexes.begin(), e = d_indexes.end(); i != e; ++i) {
            if ((*i).stride > (unsigned long long) (a->dimension_stop(d, false) - a->dimension_start(d, false)) + 1)
                throw Error(malformed_expr,
                    "For '" + btp->name()
                        + "', the index stride value is greater than the number of elements in the Array");
            if (!(*i).rest
                && ((*i).stop) > (unsigned long long) (a->dimension_stop(d, false) - a->dimension_start(d, false)) + 1)
                throw Error(malformed_expr,
                    "For '" + btp->name()
                        + "', the index stop value is greater than the number of elements in the Array");

            D4Dimension *dim = a->dimension_D4dim(d);

            // In a DAP4 CE, specifying '[]' as an array dimension slice has two meanings.
            // It can mean 'all the elements' of the dimension or 'apply the slicing inherited
            // from the shared dimension'. The latter might be provide 'all the elements'
            // but regardless, the Array object must record the CE correctly.

            if (dim && (*i).empty) {
                // This case corresponds to a CE that uses the '[]' notation for a
                // particular dimension - meaning, use the Shared Dimension size for
                // this dimension's 'slice'.
                a->add_constraint(d, dim);  // calls set_used_by_projected_var(true) + more
            }
            else {
                // This case corresponds to a 'local dimension slice' (See sections 8.6.2 and
                // 8.7 of the spec as of 4/12/16). When a local dimension slice is used, drop
                // the Map(s) that include that dimension. This enables people to constrain
                // an Array when some of the Array's dimensions don't use Shared Dimensions
                // but others do.

                // First apply the constraint to the Array's dimension
                a->add_constraint(d, (*i).start, (*i).stride, (*i).rest ? -1 : (*i).stop);

                // Then, if the Array has Maps, scan those Maps for any that use dimensions
                // that match the name of this particular dimension. If any such Maps are found
                // remove them. This ensure that the Array can be constrained using the  'local
                // dimension slice' without the constrained DMR containing references to Maps
                // that don't exist (or are otherwise nonsensical).
                //
                // This code came about as a fix for problems discovered during testing of
                // local dimension slices. See https://opendap.atlassian.net/browse/HYRAX-98
                // jhrg 4/12/16
                if (!a->maps()->empty()) {
                    for (D4Maps::D4MapsIter m = a->maps()->map_begin(), e = a->maps()->map_end(); m != e; ++m) {
                        if ((*m)->array() == 0)
                            throw Error(malformed_expr,
                                "An array with Maps was found, but one of the Maps was not defined correctly.");

                        Array *map = const_cast<Array*>((*m)->array()); // Array lacks const iterator support
                        // Added a test to ensure 'dim' is not null. This could be the case if
                        // execution gets here and the index *i was not empty. jhrg 4/18/17
                        if (dim && array_uses_shared_dimension(map, dim)) {
                            D4Map *map_to_be_removed = *m;
                            a->maps()->remove_map(map_to_be_removed); // Invalidates the iterator
                            delete map_to_be_removed;   // removed from container; delete
                            break; // must leave the for loop because 'm' is now invalid
                        }
                    }
                }
            }

            ++d;
        }
    }

    d_indexes.clear();  // Clear the info so the next slice expression can be parsed.

    return btp;
}

/**
 * Add an array to the current projection with slicing. Calling this method will result
 * in the array being returned with anonymous dimensions.
 *
 * @param id
 * @return The BaseType* to the Array variable; the send_p and slicing information is
 * set as a side effect.
 */
D4Dimension *
D4ConstraintEvaluator::slice_dimension(const std::string &id, const index &i)
{
    D4Dimension *dim = dmr()->root()->find_dim(id);

    if (i.stride > dim->size())
        throw Error(malformed_expr,
            "For '" + id + "', the index stride value is greater than the size of the dimension");
    if (!i.rest && (i.stop > dim->size() - 1))
        throw Error(malformed_expr, "For '" + id + "', the index stop value is greater than the size of the dimension");

    dim->set_constraint(i.start, i.stride, i.rest ? dim->size() - 1 : i.stop);

    return dim;
}

D4ConstraintEvaluator::index D4ConstraintEvaluator::make_index(const std::string &i)
{
    unsigned long long v = get_uint64(i.c_str());
    return index(v, 1, v, false, false /*empty*/, "");
}

D4ConstraintEvaluator::index D4ConstraintEvaluator::make_index(const std::string &i, const std::string &s,
    const std::string &e)
{
    int64_t initial = get_uint64(i.c_str());
    int64_t end = get_uint64(e.c_str());
    if (initial > end)
        throw Error(malformed_expr, string("The start value of an array index is past the stop value."));

    return index(initial, get_uint64(s.c_str()), end, false, false /*empty*/, "");
}

D4ConstraintEvaluator::index D4ConstraintEvaluator::make_index(const std::string &i, unsigned long long s,
    const std::string &e)
{
    int64_t initial = get_uint64(i.c_str());
    int64_t end = get_uint64(e.c_str());
    if (initial > end)
        throw Error(malformed_expr, string("The start value of an array index is past the stop value."));

    return index(initial, s, end, false, false /*empty*/, "");
}

D4ConstraintEvaluator::index D4ConstraintEvaluator::make_index(const std::string &i, const std::string &s)
{
    return index(get_uint64(i.c_str()), get_uint64(s.c_str()), 0, true, false /*empty*/, "");
}

D4ConstraintEvaluator::index D4ConstraintEvaluator::make_index(const std::string &i, unsigned long long s)
{
    return index(get_uint64(i.c_str()), s, 0, true, false /*empty*/, "");
}

static string expr_msg(const std::string &op, const std::string &arg1, const std::string &arg2)
{
    return "(" + arg1 + " " + op + " " + arg2 + ").";
}

/**
 * @brief Return the D4FilterClause constant for an operator
 *
 * Here are the strings returned by the parser:
 *   GREATER ">"
 *   LESS_EQUAL "<="
 *   GREATER_EQUAL ">="
 *   EQUAL "=="
 *   NOT_EQUAL "!="
 *   REGEX_MATCH "~="
 *
 *   LESS_BBOX "<<"
 *   GREATER_BBOX ">>"
 *
 *   MASK "@="
 *   ND "ND"
 */
static D4FilterClause::ops get_op_code(const std::string &op)
{
    DBGN(cerr << "Entering " << __PRETTY_FUNCTION__ << endl << "op: " << op << endl);

    if (op == "<")
        return D4FilterClause::less;
    else if (op == ">")
        return D4FilterClause::greater;
    else if (op == "<=")
        return D4FilterClause::less_equal;
    else if (op == ">=")
        return D4FilterClause::greater_equal;
    else if (op == "==")
        return D4FilterClause::equal;
    else if (op == "!=")
        return D4FilterClause::not_equal;
    else if (op == "~=")
        return D4FilterClause::match;
    else
        throw Error(malformed_expr, "The opertator '" + op + "' is not supported.");
}

/**
 * @brief Add a D4FilterClause
 *
 * This method adds a filter clause to the D4Sequence that is on the top of the
 * parser's stack. If there is not a D4Sequence on the stack, an exception is
 * thrown. Similarly, if the filter clause parameters are not valid, then an
 * exception is thrown.
 *
 * Filter clause rules: One of the parameters must be a variable in a D4Sequence
 * and the other must be a constant. The operator must be one of the valid relops.
 * Note that the D4FilterClause objects use the same numerical codes as the DAP2
 * parser/evaluator.
 *
 * @note The parser will have pushed the Sequence onto the BaseType stack during
 * the parse, so variables can be looked up using the top_basetype() (which
 * must be a D4Sequence).
 *
 * @param arg1 The first argument; a D4Sequence variable or a constant.
 * @param arg2 The second argument; a D4Sequence variable or a constant.
 * @param op The infix relop
 */
void D4ConstraintEvaluator::add_filter_clause(const std::string &op, const std::string &arg1, const std::string &arg2)
{
    DBG(cerr << "Entering: " << __PRETTY_FUNCTION__ << endl);

    // Check that there really is a D4Sequence associated with this filter clause.
    D4Sequence *s = dynamic_cast<D4Sequence*>(top_basetype());
    if (!s)
        throw Error(malformed_expr,
            "When a filter expression is used, it must be bound to a Sequence variable: " + expr_msg(op, arg1, arg2));

    DBG(cerr << "s->name(): " << s->name() << endl);

    // Check that arg1 and 2 are valid
    BaseType *a1 = s->var(arg1);
    BaseType *a2 = s->var(arg2);
    DBG(cerr << "a1: " << a1 << ", a2: " << a2 << endl);

    if (a1 && a2)
        throw Error(malformed_expr,
            "One of the arguments in a filter expression must be a constant: " + expr_msg(op, arg1, arg2));
    if (!(a1 || a2))
        throw Error(malformed_expr,
            "One of the arguments in a filter expression must be a variable in a Sequence: "
                + expr_msg(op, arg1, arg2));

    // Now we know a1 XOR a2 is true
    if (a1) {
        s->clauses().add_clause(new D4FilterClause(get_op_code(op), new D4RValue(a1), D4RValueFactory(arg2)));
    }
    else {
        s->clauses().add_clause(new D4FilterClause(get_op_code(op), D4RValueFactory(arg1), new D4RValue(a2)));
    }
}

/**
 * @brief If the string has surrounding quotes, remove them.
 *
 * @param src The source string, passed by reference and modified in place
 * @return A reference to the sting parameter.
 */
string &
D4ConstraintEvaluator::remove_quotes(string &s)
{
    if (*s.begin() == '\"' && *(s.end() - 1) == '\"') {
        s.erase(s.begin());
        s.erase(s.end() - 1);
    }

    return s;
}

// This method is called from the parser (see d4_ce_parser.yy, down in the code
// section). This will be called during the call to D4CEParser::parse(), that
// is inside D4ConstraintEvaluator::parse(...)
//
// Including the value passed in for 'l' allows the CE text to leak into
// the error message, a potential XSS attack vector. jhrg 4/15/20
void D4ConstraintEvaluator::error(const libdap::location &, const std::string &m)
{
    ostringstream oss;
#if PREVENT_XXS_VIA_CE
    oss << "Constraint expression parse error: " << m << ends;
#else
    oss << l << ": " << m << ends;
#endif
    throw Error(malformed_expr, oss.str());
}

} /* namespace libdap */