File: layer1.py

package info (click to toggle)
python-boto 2.49.0-4.1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 9,888 kB
  • sloc: python: 86,396; makefile: 112
file content (540 lines) | stat: -rw-r--r-- 29,110 bytes parent folder | download | duplicates (12)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
# Copyright (c) 2014 Amazon.com, Inc. or its affiliates.  All Rights Reserved
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish, dis-
# tribute, sublicense, and/or sell copies of the Software, and to permit
# persons to whom the Software is furnished to do so, subject to the fol-
# lowing conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
#
from boto.compat import json
from boto.exception import JSONResponseError
from boto.connection import AWSAuthConnection
from boto.regioninfo import RegionInfo
from boto.cloudsearchdomain import exceptions


class CloudSearchDomainConnection(AWSAuthConnection):
    """
    You use the AmazonCloudSearch2013 API to upload documents to a
    search domain and search those documents.

    The endpoints for submitting `UploadDocuments`, `Search`, and
    `Suggest` requests are domain-specific. To get the endpoints for
    your domain, use the Amazon CloudSearch configuration service
    `DescribeDomains` action. The domain endpoints are also displayed
    on the domain dashboard in the Amazon CloudSearch console. You
    submit suggest requests to the search endpoint.

    For more information, see the `Amazon CloudSearch Developer
    Guide`_.
    """
    APIVersion = "2013-01-01"
    AuthServiceName = 'cloudsearch'
    DefaultRegionName = "us-east-1"
    DefaultRegionEndpoint = "cloudsearch.us-east-1.amazonaws.com"
    ResponseError = JSONResponseError

    _faults = {
        "SearchException": exceptions.SearchException,
        "DocumentServiceException": exceptions.DocumentServiceException,
    }

    def __init__(self, **kwargs):
        region = kwargs.get('region')
        if not region:
            region = RegionInfo(self, self.DefaultRegionName,
                                self.DefaultRegionEndpoint)
        else:
            del kwargs['region']
        if kwargs.get('host', None) is None:
            raise ValueError(
                'The argument, host, must be provided when creating a '
                'CloudSearchDomainConnection because its methods require the '
                'specific domain\'s endpoint in order to successfully make '
                'requests to that CloudSearch Domain.'
            )
        super(CloudSearchDomainConnection, self).__init__(**kwargs)
        self.region = region
    
    def _required_auth_capability(self):
        return ['hmac-v4']

    def search(self, query, cursor=None, expr=None, facet=None,
               filter_query=None, highlight=None, partial=None,
               query_options=None, query_parser=None, ret=None, size=None,
               sort=None, start=None):
        """
        Retrieves a list of documents that match the specified search
        criteria. How you specify the search criteria depends on which
        query parser you use. Amazon CloudSearch supports four query
        parsers:


        + `simple`: search all `text` and `text-array` fields for the
          specified string. Search for phrases, individual terms, and
          prefixes.
        + `structured`: search specific fields, construct compound
          queries using Boolean operators, and use advanced features
          such as term boosting and proximity searching.
        + `lucene`: specify search criteria using the Apache Lucene
          query parser syntax.
        + `dismax`: specify search criteria using the simplified
          subset of the Apache Lucene query parser syntax defined by the
          DisMax query parser.


        For more information, see `Searching Your Data`_ in the Amazon
        CloudSearch Developer Guide .

        The endpoint for submitting `Search` requests is domain-
        specific. You submit search requests to a domain's search
        endpoint. To get the search endpoint for your domain, use the
        Amazon CloudSearch configuration service `DescribeDomains`
        action. A domain's endpoints are also displayed on the domain
        dashboard in the Amazon CloudSearch console.

        :type cursor: string
        :param cursor: Retrieves a cursor value you can use to page through
            large result sets. Use the `size` parameter to control the number
            of hits to include in each response. You can specify either the
            `cursor` or `start` parameter in a request; they are mutually
            exclusive. To get the first cursor, set the cursor value to
            `initial`. In subsequent requests, specify the cursor value
            returned in the hits section of the response.
        For more information, see `Paginating Results`_ in the Amazon
            CloudSearch Developer Guide .

        :type expr: string
        :param expr: Defines one or more numeric expressions that can be used
            to sort results or specify search or filter criteria. You can also
            specify expressions as return fields.
        For more information about defining and using expressions, see
            `Configuring Expressions`_ in the Amazon CloudSearch Developer
            Guide .

        :type facet: string
        :param facet: Specifies one or more fields for which to get facet
            information, and options that control how the facet information is
            returned. Each specified field must be facet-enabled in the domain
            configuration. The fields and options are specified in JSON using
            the form `{"FIELD":{"OPTION":VALUE,"OPTION:"STRING"},"FIELD":{"OPTI
            ON":VALUE,"OPTION":"STRING"}}`.
        You can specify the following faceting options:


        + `buckets` specifies an array of the facet values or ranges to count.
              Ranges are specified using the same syntax that you use to search
              for a range of values. For more information, see ` Searching for a
              Range of Values`_ in the Amazon CloudSearch Developer Guide .
              Buckets are returned in the order they are specified in the
              request. The `sort` and `size` options are not valid if you specify
              `buckets`.
        + `size` specifies the maximum number of facets to include in the
              results. By default, Amazon CloudSearch returns counts for the top
              10. The `size` parameter is only valid when you specify the `sort`
              option; it cannot be used in conjunction with `buckets`.
        + `sort` specifies how you want to sort the facets in the results:
              `bucket` or `count`. Specify `bucket` to sort alphabetically or
              numerically by facet value (in ascending order). Specify `count` to
              sort by the facet counts computed for each facet value (in
              descending order). To retrieve facet counts for particular values
              or ranges of values, use the `buckets` option instead of `sort`.


        If no facet options are specified, facet counts are computed for all
            field values, the facets are sorted by facet count, and the top 10
            facets are returned in the results.

        For more information, see `Getting and Using Facet Information`_ in the
            Amazon CloudSearch Developer Guide .

        :type filter_query: string
        :param filter_query: Specifies a structured query that filters the
            results of a search without affecting how the results are scored
            and sorted. You use `filterQuery` in conjunction with the `query`
            parameter to filter the documents that match the constraints
            specified in the `query` parameter. Specifying a filter controls
            only which matching documents are included in the results, it has
            no effect on how they are scored and sorted. The `filterQuery`
            parameter supports the full structured query syntax.
        For more information about using filters, see `Filtering Matching
            Documents`_ in the Amazon CloudSearch Developer Guide .

        :type highlight: string
        :param highlight: Retrieves highlights for matches in the specified
            `text` or `text-array` fields. Each specified field must be
            highlight enabled in the domain configuration. The fields and
            options are specified in JSON using the form `{"FIELD":{"OPTION":VA
            LUE,"OPTION:"STRING"},"FIELD":{"OPTION":VALUE,"OPTION":"STRING"}}`.
        You can specify the following highlight options:


        + `format`: specifies the format of the data in the text field: `text`
              or `html`. When data is returned as HTML, all non-alphanumeric
              characters are encoded. The default is `html`.
        + `max_phrases`: specifies the maximum number of occurrences of the
              search term(s) you want to highlight. By default, the first
              occurrence is highlighted.
        + `pre_tag`: specifies the string to prepend to an occurrence of a
              search term. The default for HTML highlights is `<em>`. The
              default for text highlights is `*`.
        + `post_tag`: specifies the string to append to an occurrence of a
              search term. The default for HTML highlights is `</em>`. The
              default for text highlights is `*`.


        If no highlight options are specified for a field, the returned field
            text is treated as HTML and the first match is highlighted with
            emphasis tags: `<em>search-term</em>`.

        :type partial: boolean
        :param partial: Enables partial results to be returned if one or more
            index partitions are unavailable. When your search index is
            partitioned across multiple search instances, by default Amazon
            CloudSearch only returns results if every partition can be queried.
            This means that the failure of a single search instance can result
            in 5xx (internal server) errors. When you enable partial results,
            Amazon CloudSearch returns whatever results are available and
            includes the percentage of documents searched in the search results
            (percent-searched). This enables you to more gracefully degrade
            your users' search experience. For example, rather than displaying
            no results, you could display the partial results and a message
            indicating that the results might be incomplete due to a temporary
            system outage.

        :type query: string
        :param query: Specifies the search criteria for the request. How you
            specify the search criteria depends on the query parser used for
            the request and the parser options specified in the `queryOptions`
            parameter. By default, the `simple` query parser is used to process
            requests. To use the `structured`, `lucene`, or `dismax` query
            parser, you must also specify the `queryParser` parameter.
        For more information about specifying search criteria, see `Searching
            Your Data`_ in the Amazon CloudSearch Developer Guide .

        :type query_options: string
        :param query_options:
        Configures options for the query parser specified in the `queryParser`
            parameter.

        The options you can configure vary according to which parser you use:


        + `defaultOperator`: The default operator used to combine individual
              terms in the search string. For example: `defaultOperator: 'or'`.
              For the `dismax` parser, you specify a percentage that represents
              the percentage of terms in the search string (rounded down) that
              must match, rather than a default operator. A value of `0%` is the
              equivalent to OR, and a value of `100%` is equivalent to AND. The
              percentage must be specified as a value in the range 0-100 followed
              by the percent (%) symbol. For example, `defaultOperator: 50%`.
              Valid values: `and`, `or`, a percentage in the range 0%-100% (
              `dismax`). Default: `and` ( `simple`, `structured`, `lucene`) or
              `100` ( `dismax`). Valid for: `simple`, `structured`, `lucene`, and
              `dismax`.
        + `fields`: An array of the fields to search when no fields are
              specified in a search. If no fields are specified in a search and
              this option is not specified, all text and text-array fields are
              searched. You can specify a weight for each field to control the
              relative importance of each field when Amazon CloudSearch
              calculates relevance scores. To specify a field weight, append a
              caret ( `^`) symbol and the weight to the field name. For example,
              to boost the importance of the `title` field over the `description`
              field you could specify: `"fields":["title^5","description"]`.
              Valid values: The name of any configured field and an optional
              numeric value greater than zero. Default: All `text` and `text-
              array` fields. Valid for: `simple`, `structured`, `lucene`, and
              `dismax`.
        + `operators`: An array of the operators or special characters you want
              to disable for the simple query parser. If you disable the `and`,
              `or`, or `not` operators, the corresponding operators ( `+`, `|`,
              `-`) have no special meaning and are dropped from the search
              string. Similarly, disabling `prefix` disables the wildcard
              operator ( `*`) and disabling `phrase` disables the ability to
              search for phrases by enclosing phrases in double quotes. Disabling
              precedence disables the ability to control order of precedence
              using parentheses. Disabling `near` disables the ability to use the
              ~ operator to perform a sloppy phrase search. Disabling the `fuzzy`
              operator disables the ability to use the ~ operator to perform a
              fuzzy search. `escape` disables the ability to use a backslash (
              `\`) to escape special characters within the search string.
              Disabling whitespace is an advanced option that prevents the parser
              from tokenizing on whitespace, which can be useful for Vietnamese.
              (It prevents Vietnamese words from being split incorrectly.) For
              example, you could disable all operators other than the phrase
              operator to support just simple term and phrase queries:
              `"operators":["and","not","or", "prefix"]`. Valid values: `and`,
              `escape`, `fuzzy`, `near`, `not`, `or`, `phrase`, `precedence`,
              `prefix`, `whitespace`. Default: All operators and special
              characters are enabled. Valid for: `simple`.
        + `phraseFields`: An array of the `text` or `text-array` fields you
              want to use for phrase searches. When the terms in the search
              string appear in close proximity within a field, the field scores
              higher. You can specify a weight for each field to boost that
              score. The `phraseSlop` option controls how much the matches can
              deviate from the search string and still be boosted. To specify a
              field weight, append a caret ( `^`) symbol and the weight to the
              field name. For example, to boost phrase matches in the `title`
              field over the `abstract` field, you could specify:
              `"phraseFields":["title^3", "plot"]` Valid values: The name of any
              `text` or `text-array` field and an optional numeric value greater
              than zero. Default: No fields. If you don't specify any fields with
              `phraseFields`, proximity scoring is disabled even if `phraseSlop`
              is specified. Valid for: `dismax`.
        + `phraseSlop`: An integer value that specifies how much matches can
              deviate from the search phrase and still be boosted according to
              the weights specified in the `phraseFields` option; for example,
              `phraseSlop: 2`. You must also specify `phraseFields` to enable
              proximity scoring. Valid values: positive integers. Default: 0.
              Valid for: `dismax`.
        + `explicitPhraseSlop`: An integer value that specifies how much a
              match can deviate from the search phrase when the phrase is
              enclosed in double quotes in the search string. (Phrases that
              exceed this proximity distance are not considered a match.) For
              example, to specify a slop of three for dismax phrase queries, you
              would specify `"explicitPhraseSlop":3`. Valid values: positive
              integers. Default: 0. Valid for: `dismax`.
        + `tieBreaker`: When a term in the search string is found in a
              document's field, a score is calculated for that field based on how
              common the word is in that field compared to other documents. If
              the term occurs in multiple fields within a document, by default
              only the highest scoring field contributes to the document's
              overall score. You can specify a `tieBreaker` value to enable the
              matches in lower-scoring fields to contribute to the document's
              score. That way, if two documents have the same max field score for
              a particular term, the score for the document that has matches in
              more fields will be higher. The formula for calculating the score
              with a tieBreaker is `(max field score) + (tieBreaker) * (sum of
              the scores for the rest of the matching fields)`. Set `tieBreaker`
              to 0 to disregard all but the highest scoring field (pure max):
              `"tieBreaker":0`. Set to 1 to sum the scores from all fields (pure
              sum): `"tieBreaker":1`. Valid values: 0.0 to 1.0. Default: 0.0.
              Valid for: `dismax`.

        :type query_parser: string
        :param query_parser:
        Specifies which query parser to use to process the request. If
            `queryParser` is not specified, Amazon CloudSearch uses the
            `simple` query parser.

        Amazon CloudSearch supports four query parsers:


        + `simple`: perform simple searches of `text` and `text-array` fields.
              By default, the `simple` query parser searches all `text` and
              `text-array` fields. You can specify which fields to search by with
              the `queryOptions` parameter. If you prefix a search term with a
              plus sign (+) documents must contain the term to be considered a
              match. (This is the default, unless you configure the default
              operator with the `queryOptions` parameter.) You can use the `-`
              (NOT), `|` (OR), and `*` (wildcard) operators to exclude particular
              terms, find results that match any of the specified terms, or
              search for a prefix. To search for a phrase rather than individual
              terms, enclose the phrase in double quotes. For more information,
              see `Searching for Text`_ in the Amazon CloudSearch Developer Guide
              .
        + `structured`: perform advanced searches by combining multiple
              expressions to define the search criteria. You can also search
              within particular fields, search for values and ranges of values,
              and use advanced options such as term boosting, `matchall`, and
              `near`. For more information, see `Constructing Compound Queries`_
              in the Amazon CloudSearch Developer Guide .
        + `lucene`: search using the Apache Lucene query parser syntax. For
              more information, see `Apache Lucene Query Parser Syntax`_.
        + `dismax`: search using the simplified subset of the Apache Lucene
              query parser syntax defined by the DisMax query parser. For more
              information, see `DisMax Query Parser Syntax`_.

        :type ret: string
        :param ret: Specifies the field and expression values to include in
            the response. Multiple fields or expressions are specified as a
            comma-separated list. By default, a search response includes all
            return enabled fields ( `_all_fields`). To return only the document
            IDs for the matching documents, specify `_no_fields`. To retrieve
            the relevance score calculated for each document, specify `_score`.

        :type size: long
        :param size: Specifies the maximum number of search hits to include in
            the response.

        :type sort: string
        :param sort: Specifies the fields or custom expressions to use to sort
            the search results. Multiple fields or expressions are specified as
            a comma-separated list. You must specify the sort direction ( `asc`
            or `desc`) for each field; for example, `year desc,title asc`. To
            use a field to sort results, the field must be sort-enabled in the
            domain configuration. Array type fields cannot be used for sorting.
            If no `sort` parameter is specified, results are sorted by their
            default relevance scores in descending order: `_score desc`. You
            can also sort by document ID ( `_id asc`) and version ( `_version
            desc`).
        For more information, see `Sorting Results`_ in the Amazon CloudSearch
            Developer Guide .

        :type start: long
        :param start: Specifies the offset of the first search hit you want to
            return. Note that the result set is zero-based; the first result is
            at index 0. You can specify either the `start` or `cursor`
            parameter in a request, they are mutually exclusive.
        For more information, see `Paginating Results`_ in the Amazon
            CloudSearch Developer Guide .

        """
        uri = '/2013-01-01/search'
        params = {}
        headers = {}
        query_params = {}
        if cursor is not None:
            query_params['cursor'] = cursor
        if expr is not None:
            query_params['expr'] = expr
        if facet is not None:
            query_params['facet'] = facet
        if filter_query is not None:
            query_params['fq'] = filter_query
        if highlight is not None:
            query_params['highlight'] = highlight
        if partial is not None:
            query_params['partial'] = partial
        if query is not None:
            query_params['q'] = query
        if query_options is not None:
            query_params['q.options'] = query_options
        if query_parser is not None:
            query_params['q.parser'] = query_parser
        if ret is not None:
            query_params['return'] = ret
        if size is not None:
            query_params['size'] = size
        if sort is not None:
            query_params['sort'] = sort
        if start is not None:
            query_params['start'] = start
        return self.make_request('POST', uri, expected_status=200,
                                 data=json.dumps(params), headers=headers,
                                 params=query_params)

    def suggest(self, query, suggester, size=None):
        """
        Retrieves autocomplete suggestions for a partial query string.
        You can use suggestions enable you to display likely matches
        before users finish typing. In Amazon CloudSearch, suggestions
        are based on the contents of a particular text field. When you
        request suggestions, Amazon CloudSearch finds all of the
        documents whose values in the suggester field start with the
        specified query string. The beginning of the field must match
        the query string to be considered a match.

        For more information about configuring suggesters and
        retrieving suggestions, see `Getting Suggestions`_ in the
        Amazon CloudSearch Developer Guide .

        The endpoint for submitting `Suggest` requests is domain-
        specific. You submit suggest requests to a domain's search
        endpoint. To get the search endpoint for your domain, use the
        Amazon CloudSearch configuration service `DescribeDomains`
        action. A domain's endpoints are also displayed on the domain
        dashboard in the Amazon CloudSearch console.

        :type query: string
        :param query: Specifies the string for which you want to get
            suggestions.

        :type suggester: string
        :param suggester: Specifies the name of the suggester to use to find
            suggested matches.

        :type size: long
        :param size: Specifies the maximum number of suggestions to return.

        """
        uri = '/2013-01-01/suggest'
        params = {}
        headers = {}
        query_params = {}
        if query is not None:
            query_params['q'] = query
        if suggester is not None:
            query_params['suggester'] = suggester
        if size is not None:
            query_params['size'] = size
        return self.make_request('GET', uri, expected_status=200,
                                 data=json.dumps(params), headers=headers,
                                 params=query_params)

    def upload_documents(self, documents, content_type):
        """
        Posts a batch of documents to a search domain for indexing. A
        document batch is a collection of add and delete operations
        that represent the documents you want to add, update, or
        delete from your domain. Batches can be described in either
        JSON or XML. Each item that you want Amazon CloudSearch to
        return as a search result (such as a product) is represented
        as a document. Every document has a unique ID and one or more
        fields that contain the data that you want to search and
        return in results. Individual documents cannot contain more
        than 1 MB of data. The entire batch cannot exceed 5 MB. To get
        the best possible upload performance, group add and delete
        operations in batches that are close the 5 MB limit.
        Submitting a large volume of single-document batches can
        overload a domain's document service.

        The endpoint for submitting `UploadDocuments` requests is
        domain-specific. To get the document endpoint for your domain,
        use the Amazon CloudSearch configuration service
        `DescribeDomains` action. A domain's endpoints are also
        displayed on the domain dashboard in the Amazon CloudSearch
        console.

        For more information about formatting your data for Amazon
        CloudSearch, see `Preparing Your Data`_ in the Amazon
        CloudSearch Developer Guide . For more information about
        uploading data for indexing, see `Uploading Data`_ in the
        Amazon CloudSearch Developer Guide .

        :type documents: blob
        :param documents: A batch of documents formatted in JSON or HTML.

        :type content_type: string
        :param content_type:
        The format of the batch you are uploading. Amazon CloudSearch supports
            two document batch formats:


        + application/json
        + application/xml

        """
        uri = '/2013-01-01/documents/batch'
        headers = {}
        query_params = {}
        if content_type is not None:
            headers['Content-Type'] = content_type
        return self.make_request('POST', uri, expected_status=200,
                                 data=documents, headers=headers,
                                 params=query_params)

    def make_request(self, verb, resource, headers=None, data='',
                     expected_status=None, params=None):
        if headers is None:
            headers = {}
        response = AWSAuthConnection.make_request(
            self, verb, resource, headers=headers, data=data, params=params)
        body = json.loads(response.read().decode('utf-8'))
        if response.status == expected_status:
            return body
        else:
            raise JSONResponseError(response.status, response.reason, body)