File: __init__.py

package info (click to toggle)
python-elasticsearch 9.1.0-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 22,728 kB
  • sloc: python: 104,053; makefile: 151; javascript: 75
file content (388 lines) | stat: -rw-r--r-- 13,774 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
#  Licensed to Elasticsearch B.V. under one or more contributor
#  license agreements. See the NOTICE file distributed with
#  this work for additional information regarding copyright
#  ownership. Elasticsearch B.V. licenses this file to you under
#  the Apache License, Version 2.0 (the "License"); you may
#  not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
# 	http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing,
#  software distributed under the License is distributed on an
#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
#  KIND, either express or implied.  See the License for the
#  specific language governing permissions and limitations
#  under the License.

from typing import (
    TYPE_CHECKING,
    Any,
    Dict,
    Generic,
    Iterator,
    List,
    Mapping,
    Optional,
    Sequence,
    Tuple,
    Union,
    cast,
)

from ..utils import _R, AttrDict, AttrList, _wrap
from .hit import Hit, HitMeta

if TYPE_CHECKING:
    from .. import types
    from ..aggs import Agg
    from ..faceted_search_base import FacetedSearchBase
    from ..search_base import Request, SearchBase
    from ..update_by_query_base import UpdateByQueryBase

__all__ = [
    "Response",
    "AggResponse",
    "UpdateByQueryResponse",
    "Hit",
    "HitMeta",
    "AggregateResponseType",
]


class Response(AttrDict[Any], Generic[_R]):
    """An Elasticsearch search response.

    :arg took: (required) The number of milliseconds it took Elasticsearch
        to run the request. This value is calculated by measuring the time
        elapsed between receipt of a request on the coordinating node and
        the time at which the coordinating node is ready to send the
        response. It includes:  * Communication time between the
        coordinating node and data nodes * Time the request spends in the
        search thread pool, queued for execution * Actual run time  It
        does not include:  * Time needed to send the request to
        Elasticsearch * Time needed to serialize the JSON response * Time
        needed to send the response to a client
    :arg timed_out: (required) If `true`, the request timed out before
        completion; returned results may be partial or empty.
    :arg _shards: (required) A count of shards used for the request.
    :arg hits: search results
    :arg aggregations: aggregation results
    :arg _clusters:
    :arg fields:
    :arg max_score:
    :arg num_reduce_phases:
    :arg profile:
    :arg pit_id:
    :arg _scroll_id: The identifier for the search and its search context.
        You can use this scroll ID with the scroll API to retrieve the
        next batch of search results for the request. This property is
        returned only if the `scroll` query parameter is specified in the
        request.
    :arg suggest:
    :arg terminated_early:
    """

    _search: "SearchBase[_R]"
    _faceted_search: "FacetedSearchBase[_R]"
    _doc_class: Optional[_R]
    _hits: List[_R]

    took: int
    timed_out: bool
    _shards: "types.ShardStatistics"
    _clusters: "types.ClusterStatistics"
    fields: Mapping[str, Any]
    max_score: float
    num_reduce_phases: int
    profile: "types.Profile"
    pit_id: str
    _scroll_id: str
    suggest: Mapping[
        str,
        Sequence[
            Union["types.CompletionSuggest", "types.PhraseSuggest", "types.TermSuggest"]
        ],
    ]
    terminated_early: bool

    def __init__(
        self,
        search: "Request[_R]",
        response: Dict[str, Any],
        doc_class: Optional[_R] = None,
    ):
        super(AttrDict, self).__setattr__("_search", search)
        super(AttrDict, self).__setattr__("_doc_class", doc_class)
        super().__init__(response)

    def __iter__(self) -> Iterator[_R]:  # type: ignore[override]
        return iter(self.hits)

    def __getitem__(self, key: Union[slice, int, str]) -> Any:
        if isinstance(key, (slice, int)):
            # for slicing etc
            return self.hits[key]
        return super().__getitem__(key)

    def __nonzero__(self) -> bool:
        return bool(self.hits)

    __bool__ = __nonzero__

    def __repr__(self) -> str:
        return "<Response: %r>" % (self.hits or self.aggregations)

    def __len__(self) -> int:
        return len(self.hits)

    def __getstate__(self) -> Tuple[Dict[str, Any], "Request[_R]", Optional[_R]]:  # type: ignore[override]
        return self._d_, self._search, self._doc_class

    def __setstate__(
        self, state: Tuple[Dict[str, Any], "Request[_R]", Optional[_R]]  # type: ignore[override]
    ) -> None:
        super(AttrDict, self).__setattr__("_d_", state[0])
        super(AttrDict, self).__setattr__("_search", state[1])
        super(AttrDict, self).__setattr__("_doc_class", state[2])

    def success(self) -> bool:
        return self._shards.total == self._shards.successful and not self.timed_out

    @property
    def hits(self) -> List[_R]:
        if not hasattr(self, "_hits"):
            h = cast(AttrDict[Any], self._d_["hits"])

            try:
                hits = AttrList(list(map(self._search._get_result, h["hits"])))
            except AttributeError as e:
                # avoid raising AttributeError since it will be hidden by the property
                raise TypeError("Could not parse hits.", e)

            # avoid assigning _hits into self._d_
            super(AttrDict, self).__setattr__("_hits", hits)
            for k in h:
                setattr(self._hits, k, _wrap(h[k]))
        return self._hits

    @property
    def aggregations(self) -> "AggResponse[_R]":
        return self.aggs

    @property
    def aggs(self) -> "AggResponse[_R]":
        if not hasattr(self, "_aggs"):
            aggs = AggResponse[_R](
                cast("Agg[_R]", self._search.aggs),
                self._search,
                cast(Dict[str, Any], self._d_.get("aggregations", {})),
            )

            # avoid assigning _aggs into self._d_
            super(AttrDict, self).__setattr__("_aggs", aggs)
        return cast("AggResponse[_R]", self._aggs)

    def search_after(self) -> "SearchBase[_R]":
        """
        Return a ``Search`` instance that retrieves the next page of results.

        This method provides an easy way to paginate a long list of results using
        the ``search_after`` option. For example::

            page_size = 20
            s = Search()[:page_size].sort("date")

            while True:
                # get a page of results
                r = await s.execute()

                # do something with this page of results

                # exit the loop if we reached the end
                if len(r.hits) < page_size:
                    break

                # get a search object with the next page of results
                s = r.search_after()

        Note that the ``search_after`` option requires the search to have an
        explicit ``sort`` order.
        """
        if len(self.hits) == 0:
            raise ValueError("Cannot use search_after when there are no search results")
        if not hasattr(self.hits[-1].meta, "sort"):  # type: ignore[attr-defined]
            raise ValueError("Cannot use search_after when results are not sorted")
        return self._search.extra(search_after=self.hits[-1].meta.sort)  # type: ignore[attr-defined]


AggregateResponseType = Union[
    "types.CardinalityAggregate",
    "types.HdrPercentilesAggregate",
    "types.HdrPercentileRanksAggregate",
    "types.TDigestPercentilesAggregate",
    "types.TDigestPercentileRanksAggregate",
    "types.PercentilesBucketAggregate",
    "types.MedianAbsoluteDeviationAggregate",
    "types.MinAggregate",
    "types.MaxAggregate",
    "types.SumAggregate",
    "types.AvgAggregate",
    "types.WeightedAvgAggregate",
    "types.ValueCountAggregate",
    "types.SimpleValueAggregate",
    "types.DerivativeAggregate",
    "types.BucketMetricValueAggregate",
    "types.StatsAggregate",
    "types.StatsBucketAggregate",
    "types.ExtendedStatsAggregate",
    "types.ExtendedStatsBucketAggregate",
    "types.GeoBoundsAggregate",
    "types.GeoCentroidAggregate",
    "types.HistogramAggregate",
    "types.DateHistogramAggregate",
    "types.AutoDateHistogramAggregate",
    "types.VariableWidthHistogramAggregate",
    "types.StringTermsAggregate",
    "types.LongTermsAggregate",
    "types.DoubleTermsAggregate",
    "types.UnmappedTermsAggregate",
    "types.LongRareTermsAggregate",
    "types.StringRareTermsAggregate",
    "types.UnmappedRareTermsAggregate",
    "types.MultiTermsAggregate",
    "types.MissingAggregate",
    "types.NestedAggregate",
    "types.ReverseNestedAggregate",
    "types.GlobalAggregate",
    "types.FilterAggregate",
    "types.ChildrenAggregate",
    "types.ParentAggregate",
    "types.SamplerAggregate",
    "types.UnmappedSamplerAggregate",
    "types.GeoHashGridAggregate",
    "types.GeoTileGridAggregate",
    "types.GeoHexGridAggregate",
    "types.RangeAggregate",
    "types.DateRangeAggregate",
    "types.GeoDistanceAggregate",
    "types.IpRangeAggregate",
    "types.IpPrefixAggregate",
    "types.FiltersAggregate",
    "types.AdjacencyMatrixAggregate",
    "types.SignificantLongTermsAggregate",
    "types.SignificantStringTermsAggregate",
    "types.UnmappedSignificantTermsAggregate",
    "types.CompositeAggregate",
    "types.FrequentItemSetsAggregate",
    "types.TimeSeriesAggregate",
    "types.ScriptedMetricAggregate",
    "types.TopHitsAggregate",
    "types.InferenceAggregate",
    "types.StringStatsAggregate",
    "types.BoxPlotAggregate",
    "types.TopMetricsAggregate",
    "types.TTestAggregate",
    "types.RateAggregate",
    "types.CumulativeCardinalityAggregate",
    "types.MatrixStatsAggregate",
    "types.GeoLineAggregate",
]


class AggResponse(AttrDict[Any], Generic[_R]):
    """An Elasticsearch aggregation response."""

    _meta: Dict[str, Any]

    def __init__(self, aggs: "Agg[_R]", search: "Request[_R]", data: Dict[str, Any]):
        super(AttrDict, self).__setattr__("_meta", {"search": search, "aggs": aggs})
        super().__init__(data)

    def __getitem__(self, attr_name: str) -> AggregateResponseType:
        if attr_name in self._meta["aggs"]:
            # don't do self._meta['aggs'][attr_name] to avoid copying
            agg = self._meta["aggs"].aggs[attr_name]
            return cast(
                AggregateResponseType,
                agg.result(self._meta["search"], self._d_[attr_name]),
            )
        return super().__getitem__(attr_name)  # type: ignore[no-any-return]

    def __iter__(self) -> Iterator[AggregateResponseType]:  # type: ignore[override]
        for name in self._meta["aggs"]:
            yield self[name]


class UpdateByQueryResponse(AttrDict[Any], Generic[_R]):
    """An Elasticsearch update by query response.

    :arg batches: The number of scroll responses pulled back by the update
        by query.
    :arg failures: Array of failures if there were any unrecoverable
        errors during the process. If this is non-empty then the request
        ended because of those failures. Update by query is implemented
        using batches. Any failure causes the entire process to end, but
        all failures in the current batch are collected into the array.
        You can use the `conflicts` option to prevent reindex from ending
        when version conflicts occur.
    :arg noops: The number of documents that were ignored because the
        script used for the update by query returned a noop value for
        `ctx.op`.
    :arg deleted: The number of documents that were successfully deleted.
    :arg requests_per_second: The number of requests per second
        effectively run during the update by query.
    :arg retries: The number of retries attempted by update by query.
        `bulk` is the number of bulk actions retried. `search` is the
        number of search actions retried.
    :arg task:
    :arg timed_out: If true, some requests timed out during the update by
        query.
    :arg took: The number of milliseconds from start to end of the whole
        operation.
    :arg total: The number of documents that were successfully processed.
    :arg updated: The number of documents that were successfully updated.
    :arg version_conflicts: The number of version conflicts that the
        update by query hit.
    :arg throttled:
    :arg throttled_millis: The number of milliseconds the request slept to
        conform to `requests_per_second`.
    :arg throttled_until:
    :arg throttled_until_millis: This field should always be equal to zero
        in an _update_by_query response. It only has meaning when using
        the task API, where it indicates the next time (in milliseconds
        since epoch) a throttled request will be run again in order to
        conform to `requests_per_second`.
    """

    _search: "UpdateByQueryBase[_R]"

    batches: int
    failures: Sequence["types.BulkIndexByScrollFailure"]
    noops: int
    deleted: int
    requests_per_second: float
    retries: "types.Retries"
    task: str
    timed_out: bool
    took: Any
    total: int
    updated: int
    version_conflicts: int
    throttled: Any
    throttled_millis: Any
    throttled_until: Any
    throttled_until_millis: Any

    def __init__(
        self,
        search: "Request[_R]",
        response: Dict[str, Any],
        doc_class: Optional[_R] = None,
    ):
        super(AttrDict, self).__setattr__("_search", search)
        super(AttrDict, self).__setattr__("_doc_class", doc_class)
        super().__init__(response)

    def success(self) -> bool:
        return not self.timed_out and not self.failures