File: api.py

package info (click to toggle)
python-efilter 1.5-2.1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye
  • size: 596 kB
  • sloc: python: 4,342; makefile: 51
file content (255 lines) | stat: -rw-r--r-- 9,126 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
# EFILTER Forensic Query Language
#
# Copyright 2015 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
EFILTER convenience API.
"""

__author__ = "Adam Sindelar <adamsh@google.com>"


from efilter import query as q
from efilter import scope

from efilter.protocols import repeated

from efilter.transforms import solve
from efilter.transforms import infer_type

from efilter.stdlib import core as std_core


def apply(query, replacements=None, vars=None, allow_io=False,
          libs=("stdcore", "stdmath")):
    """Run 'query' on 'vars' and return the result(s).

    Arguments:
        query: A query object or string with the query.
        replacements: Built-time parameters to the query, either as dict or
            as an array (for positional interpolation).
        vars: The variables to be supplied to the query solver.
        allow_io: (Default: False) Include 'stdio' and allow IO functions.
        libs: Iterable of library modules to include, given as strings.
            Default: ('stdcore', 'stdmath')
            For full list of bundled libraries, see efilter.stdlib.

            Note: 'stdcore' must always be included.

            WARNING: Including 'stdio' must be done in conjunction with
                'allow_io'. This is to make enabling IO explicit. 'allow_io'
                implies that 'stdio' should be included and so adding it to
                libs is actually not required.

    Notes on IO: If allow_io is set to True then 'stdio' will be included and
    the EFILTER query will be allowed to read files from disk. Use this with
    caution.

        If the query returns a lazily-evaluated result that depends on reading
        from a file (for example, filtering a CSV file) then the file
        descriptor will remain open until the returned result is deallocated.
        The caller is responsible for releasing the result when it's no longer
        needed.

    Returns:
        The result of evaluating the query. The type of the output will depend
        on the query, and can be predicted using 'infer' (provided reflection
        callbacks are implemented). In the common case of a SELECT query the
        return value will be an iterable of filtered data (actually an object
        implementing IRepeated, as well as __iter__.)

    A word on cardinality of the return value:
        Types in EFILTER always refer to a scalar. If apply returns more than
        one value, the type returned by 'infer' will refer to the type of
        the value inside the returned container.

        If you're unsure whether your query returns one or more values (rows),
        use the 'getvalues' function.

    Raises:
        efilter.errors.EfilterError if there are issues with the query.

    Examples:
        apply("5 + 5") # -> 10

        apply("SELECT * FROM people WHERE age > 10",
              vars={"people":({"age": 10, "name": "Bob"},
                              {"age": 20, "name": "Alice"},
                              {"age": 30, "name": "Eve"}))

        # This will replace the question mark (?) with the string "Bob" in a
        # safe manner, preventing SQL injection.
        apply("SELECT * FROM people WHERE name = ?", replacements=["Bob"], ...)
    """
    if vars is None:
        vars = {}

    if allow_io:
        libs = list(libs)
        libs.append("stdio")

    query = q.Query(query, params=replacements)

    stdcore_included = False
    for lib in libs:
        if lib == "stdcore":
            stdcore_included = True
            # 'solve' always includes this automatically - we don't have a say
            # in the matter.
            continue

        if lib == "stdio" and not allow_io:
            raise ValueError("Attempting to include 'stdio' but IO not "
                             "enabled. Pass allow_io=True.")

        module = std_core.LibraryModule.ALL_MODULES.get(lib)
        if not lib:
            raise ValueError("There is no standard library module %r." % lib)
        vars = scope.ScopeStack(module, vars)

    if not stdcore_included:
        raise ValueError("EFILTER cannot work without standard lib 'stdcore'.")

    results = solve.solve(query, vars).value

    return results


def getvalues(result):
    """Return an iterator of results of 'apply'.

    The 'apply' function can return one or more values, depending on the query.
    If you are unsure whether your query evaluates to a scalar or a collection
    of scalars, 'getvalues' will always return an iterator with one or more
    elements.

    Arguments:
        result: Anything. If it's an instance of IRepeated, all values will be
            returned.

    Returns:
        An iterator of at least one element.
    """
    return repeated.getvalues(result)


def user_func(func, arg_types=None, return_type=None):
    """Create an EFILTER-callable version of function 'func'.

    As a security precaution, EFILTER will not execute Python callables
    unless they implement the IApplicative protocol. There is a perfectly good
    implementation of this protocol in the standard library and user functions
    can inherit from it.

    This will declare a subclass of the standard library TypedFunction and
    return an instance of it that EFILTER will happily call.

    Arguments:
        func: A Python callable that will serve as the implementation.
        arg_types (optional): A tuple of argument types. If the function takes
            keyword arguments, they must still have a defined order.
        return_type (optional): The type the function returns.

    Returns:
        An instance of a custom subclass of efilter.stdlib.core.TypedFunction.

    Examples:
        def my_callback(tag):
            print("I got %r" % tag)

        api.apply("if True then my_callback('Hello World!')",
                  vars={
                    "my_callback": api.user_func(my_callback)
                  })

        # This should print "I got 'Hello World!'".
    """
    class UserFunction(std_core.TypedFunction):
        name = func.__name__

        def __call__(self, *args, **kwargs):
            return func(*args, **kwargs)

        @classmethod
        def reflect_static_args(cls):
            return arg_types

        @classmethod
        def reflect_static_return(cls):
            return return_type

    return UserFunction()


def infer(query, replacements=None, root_type=None,
          libs=("stdcore", "stdmath")):
    """Determine the type of the query's output without actually running it.

    Arguments:
        query: A query object or string with the query.
        replacements: Built-time parameters to the query, either as dict or as
            an array (for positional interpolation).
        root_type: The types of variables to be supplied to the query inference.
        libs: What standard libraries should be taken into account for the
            inference.

    Returns:
        The type of the query's output, if it can be determined. If undecidable,
        returns efilter.protocol.AnyType.

        NOTE: The inference returns the type of a row in the results, not of the
        actual Python object returned by 'apply'. For example, if a query
        returns multiple rows, each one of which is an integer, the type of the
        output is considered to be int, not a collection of rows.

    Examples:
        infer("5 + 5") # -> INumber

        infer("SELECT * FROM people WHERE age > 10") # -> AnyType

        # If root_type implements the IStructured reflection API:
        infer("SELECT * FROM people WHERE age > 10", root_type=...) # -> dict
    """
    # Always make the scope stack start with stdcore.
    if root_type:
        type_scope = scope.ScopeStack(std_core.MODULE, root_type)
    else:
        type_scope = scope.ScopeStack(std_core.MODULE)

    stdcore_included = False
    for lib in libs:
        if lib == "stdcore":
            stdcore_included = True
            continue

        module = std_core.LibraryModule.ALL_MODULES.get(lib)
        if not module:
            raise TypeError("No standard library module %r." % lib)

        type_scope = scope.ScopeStack(module, type_scope)

    if not stdcore_included:
        raise TypeError("'stdcore' must always be included.")

    query = q.Query(query, params=replacements)
    return infer_type.infer_type(query, type_scope)


def search(query, data, replacements=None):
    """Yield objects from 'data' that match the 'query'."""
    query = q.Query(query, params=replacements)
    for entry in data:
        if solve.solve(query, entry).value:
            yield entry