File: query.py

package info (click to toggle)
python-lunr 0.8.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 3,644 kB
  • sloc: python: 3,811; javascript: 114; makefile: 60
file content (145 lines) | stat: -rw-r--r-- 4,907 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
from enum import Enum


class QueryPresence(Enum):
    """Defines possible behaviours for the term's presence in a document."""

    OPTIONAL = 1  # default
    REQUIRED = 2
    PROHIBITED = 3  # documents that contain this term will not be returned


class Query:
    """A `lunr.Query` provides a programmatic way of defining queries to be
    performed against a `lunr.Index`.

    Prefer constructing a `lunr.Query` using `the lunr.Index.query` method
    so the query object is pre-initialized with the right index fields.
    """

    # Constants for indicating what kind of automatic wildcard insertion will
    # be used when constructing a query clause.
    # This allows wildcards to be added to the beginning and end of a term
    # without having to manually do any string concatenation.
    # The wildcard constants can be bitwise combined to select both leading and
    # trailing wildcards.
    WILDCARD = "*"
    WILDCARD_NONE = 0
    WILDCARD_LEADING = 1
    WILDCARD_TRAILING = 2

    def __init__(self, all_fields):
        self.clauses = []
        self.all_fields = all_fields

    def __repr__(self):
        return '<Query fields="{}" clauses="{}">'.format(
            ",".join(self.all_fields), ",".join(c.term for c in self.clauses)
        )

    def clause(self, *args, **kwargs):
        """Adds a `lunr.Clause` to this query.

        Unless the clause contains the fields to be matched all fields will be
        matched. In addition a default boost of 1 is applied to the clause.

        If the first argument is a `lunr.Clause` it will be mutated and added,
        otherwise args and kwargs will be used in the constructor.

        Returns:
            lunr.Query: The Query itself.
        """
        if args and isinstance(args[0], Clause):
            clause = args[0]
        else:
            clause = Clause(*args, **kwargs)

        if not clause.fields:
            clause.fields = self.all_fields

        if (clause.wildcard & Query.WILDCARD_LEADING) and (
            clause.term[0] != Query.WILDCARD
        ):
            clause.term = Query.WILDCARD + clause.term

        if (clause.wildcard & Query.WILDCARD_TRAILING) and (
            clause.term[-1] != Query.WILDCARD
        ):
            clause.term = clause.term + Query.WILDCARD

        self.clauses.append(clause)
        return self

    def term(self, term, **kwargs):
        """Adds a term to the current query, creating a Clause and adds it to
        the list of clauses making up this Query.

        The term is not tokenized and used "as is". Any conversion to token
        or token-like strings should be performed before calling this method.

        For example:
            query.term(lunr.Tokenizer("foo bar"))

        Args:
            term (Token or iterable): Token or iterable of tokens to add.
            kwargs (dict): Additional properties to add to the Clause.
        """
        if isinstance(term, (list, tuple)):
            for t in term:
                self.term(t, **kwargs)
        else:
            self.clause(str(term), **kwargs)

        return self

    def is_negated(self):
        """A negated query is one in which every clause has a presence of
        prohibited. These queries require some special processing to return
        the expected results.
        """
        return all(
            clause.presence == QueryPresence.PROHIBITED for clause in self.clauses
        )


class Clause:
    """A single clause in a `lunr.Query` contains a term and details on
    how to match that term against a `lunr.Index`

    Args:
        term (str, optional): The term for the clause.
        field (iterable, optional): The fields for the term to be searched
            against.
        edit_distance (int, optional): The character distance to use, defaults
            to 0.
        use_pipeline (bool, optional): Whether the clause should be pre
            processed by the index's pipeline, default to True.
        boost (int, optional): Boost to apply to the clause, defaults to 1.
        wildcard (Query.WILDCARD_*, optional): Any of the Query.WILDCARD
            constants defining if a wildcard is to be used and how, defaults
            to Query.WILDCARD_NONE.
        presence (QueryPresence, optional): Behaviour for a terms presence
            in a document.
    """

    def __init__(
        self,
        term=None,
        fields=None,
        edit_distance=0,
        use_pipeline=True,
        boost=1,
        wildcard=Query.WILDCARD_NONE,
        presence=QueryPresence.OPTIONAL,
    ):
        super().__init__()
        self.term = term
        self.fields = fields or []
        self.edit_distance = edit_distance
        self.use_pipeline = use_pipeline
        self.boost = boost
        self.wildcard = wildcard
        self.presence = presence

    def __repr__(self):
        return '<Clause term="{}">'.format(self.term)