1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
|
from lunr.query_lexer import QueryLexer
from lunr.query import Clause, QueryPresence
from lunr.exceptions import QueryParseError
class QueryParser:
def __init__(self, string, query):
self.lexer = QueryLexer(string)
self.query = query
self.current_clause = Clause()
self.lexeme_idx = 0
def parse(self):
self.lexer.run()
self.lexemes = self.lexer.lexemes
state = self.__class__.parse_clause
while state:
state = state(self)
return self.query
def peek_lexeme(self):
try:
return self.lexemes[self.lexeme_idx]
except IndexError:
return None
def consume_lexeme(self):
lexeme = self.peek_lexeme()
self.lexeme_idx += 1
return lexeme
def next_clause(self):
self.query.clause(self.current_clause)
self.current_clause = Clause()
@classmethod
def parse_clause(cls, parser):
lexeme = parser.peek_lexeme()
if lexeme is None:
return
if lexeme["type"] == QueryLexer.FIELD:
return cls.parse_field
elif lexeme["type"] == QueryLexer.TERM:
return cls.parse_term
elif lexeme["type"] == QueryLexer.PRESENCE:
return cls.parse_presence
else:
raise QueryParseError(
"Expected either a field or a term, found {}{}".format(
lexeme["type"],
(
'with value "' + lexeme["string"] + '"'
if len(lexeme["string"])
else ""
),
)
)
@classmethod
def parse_field(cls, parser):
lexeme = parser.consume_lexeme()
if lexeme["string"] not in parser.query.all_fields:
raise QueryParseError(
'Unrecognized field "{}", possible fields {}'.format(
lexeme["string"], ", ".join(parser.query.all_fields)
)
)
parser.current_clause.fields = [lexeme["string"]]
next_lexeme = parser.peek_lexeme()
if next_lexeme is None:
raise QueryParseError("Expected term, found nothing")
if next_lexeme["type"] == QueryLexer.TERM:
return cls.parse_term
else:
raise QueryParseError("Expected term, found {}".format(next_lexeme["type"]))
@classmethod
def parse_term(cls, parser):
lexeme = parser.consume_lexeme()
parser.current_clause.term = lexeme["string"].lower()
if "*" in lexeme["string"]:
parser.current_clause.use_pipeline = False
return cls._peek_next_lexeme(parser)
@classmethod
def parse_presence(cls, parser):
lexeme = parser.consume_lexeme()
if lexeme is None:
return
if lexeme["string"] == "-":
parser.current_clause.presence = QueryPresence.PROHIBITED
elif lexeme["string"] == "+":
parser.current_clause.presence = QueryPresence.REQUIRED
else:
raise QueryParseError(
"Unrecognized parser operator: {}, expected `+` or `-`".format(
lexeme.str
)
)
next_lexeme = parser.peek_lexeme()
if next_lexeme is None:
raise QueryParseError("Expected either a field or a term, found nothing")
if next_lexeme["type"] == QueryLexer.FIELD:
return cls.parse_field
elif next_lexeme["type"] == QueryLexer.TERM:
return cls.parse_term
else:
raise QueryParseError(
"Expected either a field or a term, found {}".format(lexeme["type"])
)
@classmethod
def parse_edit_distance(cls, parser):
lexeme = parser.consume_lexeme()
try:
edit_distance = int(lexeme["string"])
except ValueError as e:
raise QueryParseError("Edit distance must be numeric") from e
parser.current_clause.edit_distance = edit_distance
return cls._peek_next_lexeme(parser)
@classmethod
def parse_boost(cls, parser):
lexeme = parser.consume_lexeme()
try:
boost = int(lexeme["string"])
except ValueError as e:
raise QueryParseError("Boost must be numeric") from e
parser.current_clause.boost = boost
return cls._peek_next_lexeme(parser)
@classmethod
def _peek_next_lexeme(cls, parser):
next_lexeme = parser.peek_lexeme()
if next_lexeme is None:
parser.next_clause()
return
if next_lexeme["type"] == QueryLexer.TERM:
parser.next_clause()
return cls.parse_term
elif next_lexeme["type"] == QueryLexer.FIELD:
parser.next_clause()
return cls.parse_field
elif next_lexeme["type"] == QueryLexer.EDIT_DISTANCE:
return cls.parse_edit_distance
elif next_lexeme["type"] == QueryLexer.BOOST:
return cls.parse_boost
elif next_lexeme["type"] == QueryLexer.PRESENCE:
parser.next_clause()
return cls.parse_presence
else:
raise QueryParseError(
"Unexpected lexeme type {}".format(next_lexeme["type"])
)
|