File: node_update.py

package info (click to toggle)
redis 5%3A8.0.2-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 22,304 kB
  • sloc: ansic: 216,903; tcl: 51,562; sh: 4,625; perl: 4,214; cpp: 3,568; python: 2,954; makefile: 2,055; ruby: 639; javascript: 30; csh: 7
file content (85 lines) | stat: -rw-r--r-- 3,730 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
from test import TestCase, generate_random_vector
import struct
import math
import random

class VectorUpdateAndClusters(TestCase):
   def getname(self):
       return "VADD vector update with cluster relocation"

   def estimated_runtime(self):
       return 2.0  # Should take around 2 seconds

   def generate_cluster_vector(self, base_vec, noise=0.1):
       """Generate a vector that's similar to base_vec with some noise."""
       vec = [x + random.gauss(0, noise) for x in base_vec]
       # Normalize
       norm = math.sqrt(sum(x*x for x in vec))
       return [x/norm for x in vec]

   def test(self):
       dim = 128
       vectors_per_cluster = 5000

       # Create two very different base vectors for our clusters
       cluster1_base = generate_random_vector(dim)
       cluster2_base = [-x for x in cluster1_base]  # Opposite direction

       # Add vectors from first cluster
       for i in range(vectors_per_cluster):
           vec = self.generate_cluster_vector(cluster1_base)
           vec_bytes = struct.pack(f'{dim}f', *vec)
           self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes,
                                    f'{self.test_key}:cluster1:{i}')

       # Add vectors from second cluster
       for i in range(vectors_per_cluster):
           vec = self.generate_cluster_vector(cluster2_base)
           vec_bytes = struct.pack(f'{dim}f', *vec)
           self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes,
                                    f'{self.test_key}:cluster2:{i}')

       # Pick a test vector from cluster1
       test_key = f'{self.test_key}:cluster1:0'

       # Verify it's in cluster1 using VSIM
       initial_vec = self.generate_cluster_vector(cluster1_base)
       results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim,
                                          *[str(x) for x in initial_vec],
                                          'COUNT', 100, 'WITHSCORES')

       # Count how many cluster1 items are in top results
       cluster1_count = sum(1 for i in range(0, len(results), 2)
                          if b'cluster1' in results[i])
       assert cluster1_count > 80, "Initial clustering check failed"

       # Now update the test vector to be in cluster2
       new_vec = self.generate_cluster_vector(cluster2_base, noise=0.05)
       vec_bytes = struct.pack(f'{dim}f', *new_vec)
       self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, test_key)

       # Verify the embedding was actually updated using VEMB
       emb_result = self.redis.execute_command('VEMB', self.test_key, test_key)
       updated_vec = [float(x) for x in emb_result]

       # Verify updated vector matches what we inserted
       dot_product = sum(a*b for a,b in zip(updated_vec, new_vec))
       similarity = dot_product / (math.sqrt(sum(x*x for x in updated_vec)) *
                                 math.sqrt(sum(x*x for x in new_vec)))
       assert similarity > 0.9, "Vector was not properly updated"

       # Verify it's now in cluster2 using VSIM
       results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim,
                                          *[str(x) for x in cluster2_base],
                                          'COUNT', 100, 'WITHSCORES')

       # Verify our updated vector is among top results
       found = False
       for i in range(0, len(results), 2):
           if results[i].decode() == test_key:
               found = True
               similarity = float(results[i+1])
               assert similarity > 0.80, f"Updated vector has low similarity: {similarity}"
               break

       assert found, "Updated vector not found in cluster2 proximity"