1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
|
# EXAMPLE: query_combined
# HIDE_START
import json
import numpy as np
import redis
import warnings
from redis.commands.json.path import Path
from redis.commands.search.field import NumericField, TagField, TextField, VectorField
from redis.commands.search.index_definition import IndexDefinition, IndexType
from redis.commands.search.query import Query
from sentence_transformers import SentenceTransformer
def embed_text(model, text):
return np.array(model.encode(text)).astype(np.float32).tobytes()
warnings.filterwarnings("ignore", category=FutureWarning, message=r".*clean_up_tokenization_spaces.*")
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
query = "Bike for small kids"
query_vector = embed_text(model, query)
r = redis.Redis(decode_responses=True)
# create index
schema = (
TextField("$.description", no_stem=True, as_name="model"),
TagField("$.condition", as_name="condition"),
NumericField("$.price", as_name="price"),
VectorField(
"$.description_embeddings",
"FLAT",
{
"TYPE": "FLOAT32",
"DIM": 384,
"DISTANCE_METRIC": "COSINE",
},
as_name="vector",
),
)
index = r.ft("idx:bicycle")
index.create_index(
schema,
definition=IndexDefinition(prefix=["bicycle:"], index_type=IndexType.JSON),
)
# load data
with open("data/query_vector.json") as f:
bicycles = json.load(f)
pipeline = r.pipeline(transaction=False)
for bid, bicycle in enumerate(bicycles):
pipeline.json().set(f'bicycle:{bid}', Path.root_path(), bicycle)
pipeline.execute()
# HIDE_END
# STEP_START combined1
q = Query("@price:[500 1000] @condition:{new}")
res = index.search(q)
print(res.total) # >>> 1
# REMOVE_START
assert res.total == 1
# REMOVE_END
# STEP_END
# STEP_START combined2
q = Query("kids @price:[500 1000] @condition:{used}")
res = index.search(q)
print(res.total) # >>> 1
# REMOVE_START
assert res.total == 1
# REMOVE_END
# STEP_END
# STEP_START combined3
q = Query("(kids | small) @condition:{used}")
res = index.search(q)
print(res.total) # >>> 2
# REMOVE_START
assert res.total == 2
# REMOVE_END
# STEP_END
# STEP_START combined4
q = Query("@description:(kids | small) @condition:{used}")
res = index.search(q)
print(res.total) # >>> 0
# REMOVE_START
assert res.total == 0
# REMOVE_END
# STEP_END
# STEP_START combined5
q = Query("@description:(kids | small) @condition:{new | used}")
res = index.search(q)
print(res.total) # >>> 0
# REMOVE_START
assert res.total == 0
# REMOVE_END
# STEP_END
# STEP_START combined6
q = Query("@price:[500 1000] -@condition:{new}")
res = index.search(q)
print(res.total) # >>> 2
# REMOVE_START
assert res.total == 2
# REMOVE_END
# STEP_END
# STEP_START combined7
q = Query("(@price:[500 1000] -@condition:{new})=>[KNN 3 @vector $query_vector]").dialect(2)
# put query string here
res = index.search(q,{ 'query_vector': query_vector })
print(res.total) # >>> 2
# REMOVE_START
assert res.total == 2
# REMOVE_END
# STEP_END
# REMOVE_START
# destroy index and data
r.ft("idx:bicycle").dropindex(delete_documents=True)
# REMOVE_END
|