File: models.py

package info (click to toggle)
python-moto 5.1.18-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 116,520 kB
  • sloc: python: 636,725; javascript: 181; makefile: 39; sh: 3
file content (277 lines) | stat: -rw-r--r-- 8,967 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
"""S3VectorsBackend class with methods for supported APIs."""

from collections.abc import Iterable
from typing import Any, Literal, Optional, TypedDict

from moto.core.base_backend import BackendDict, BaseBackend
from moto.core.common_models import BaseModel
from moto.utilities.arns import parse_arn
from moto.utilities.utils import PARTITION_NAMES

from .exceptions import (
    IndexNotFound,
    VectorBucketAlreadyExists,
    VectorBucketNotEmpty,
    VectorBucketNotFound,
    VectorBucketPolicyNotFound,
    VectorWrongDimension,
)
from .utils import create_vector_bucket_arn


class VectorData(TypedDict):
    float32: list[float]


class VectorType(TypedDict, total=False):
    key: str
    data: VectorData
    metadata: Any


class Vector(BaseModel):
    def __init__(self, key: str, data: VectorData, metadata: Any):
        self.key = key
        self.data = data
        self.metadata = metadata

    def to_dict(self, return_data: bool, return_metadata: bool) -> VectorType:
        return (
            {"key": self.key}  # type: ignore[return-value]
            | ({"data": self.data} if return_data else {})
            | ({"metadata": self.metadata} if return_metadata else {})
        )

    @staticmethod
    def from_dict(dct: VectorType) -> "Vector":
        return Vector(
            key=dct["key"], data=dct["data"], metadata=dct.get("metadata", {})
        )


class Index(BaseModel):
    def __init__(
        self,
        bucket: "VectorBucket",
        name: str,
        dimension: int,
        data_type: Literal["float32"],
        distance_metric: str,
    ):
        self.vectorBucketName = bucket.vector_bucket_name
        self.index_name = name
        self.index_arn = f"{bucket.vector_bucket_arn}/index/{name}"
        self.dimension = dimension
        self.data_type = data_type
        self.distance_metric = distance_metric

        self._bucket = bucket
        self.vectors: dict[str, Vector] = {}


class VectorBucket(BaseModel):
    def __init__(
        self,
        arn: str,
        name: str,
        encryption_configuration: dict[str, str],
    ):
        self.vector_bucket_name = name
        self.vector_bucket_arn = arn
        self.encryption_configuration = encryption_configuration or {
            "sseType": "AES256"
        }

        self.indexes: dict[str, Index] = {}
        self.policy: Optional[str] = None


class S3VectorsBackend(BaseBackend):
    """Implementation of S3Vectors APIs."""

    def __init__(self, region_name: str, account_id: str):
        super().__init__(region_name, account_id)
        self.vector_buckets: dict[str, VectorBucket] = {}

    def create_vector_bucket(
        self,
        region: str,
        vector_bucket_name: str,
        encryption_configuration: dict[str, str],
    ) -> None:
        vector_bucket_arn = create_vector_bucket_arn(
            self.account_id, region, name=vector_bucket_name
        )
        if vector_bucket_arn in self.vector_buckets:
            raise VectorBucketAlreadyExists
        vector_bucket = VectorBucket(
            arn=vector_bucket_arn,
            name=vector_bucket_name,
            encryption_configuration=encryption_configuration,
        )
        self.vector_buckets[vector_bucket.vector_bucket_arn] = vector_bucket

    def get_vector_bucket(
        self,
        vector_bucket_name: Optional[str] = None,
        vector_bucket_arn: Optional[str] = None,
    ) -> VectorBucket:
        if vector_bucket_name:
            for vector_bucket in self.vector_buckets.values():
                if vector_bucket.vector_bucket_name == vector_bucket_name:
                    return vector_bucket
        if vector_bucket_arn and (bucket := self.vector_buckets.get(vector_bucket_arn)):
            return bucket
        raise VectorBucketNotFound

    def delete_vector_bucket(self, vector_bucket_name: str) -> None:
        if vector_bucket_name:
            bucket = self.get_vector_bucket(vector_bucket_name=vector_bucket_name)
            if bucket.indexes:
                raise VectorBucketNotEmpty
            self.vector_buckets.pop(bucket.vector_bucket_arn, None)

    def list_vector_buckets(self, prefix: Optional[str]) -> list[VectorBucket]:
        return [
            bucket
            for bucket in self.vector_buckets.values()
            if not prefix or bucket.vector_bucket_name.startswith(prefix)
        ]

    def create_index(
        self,
        vector_bucket_name: str,
        vector_bucket_arn: str,
        index_name: str,
        data_type: Literal["float32"],
        dimension: int,
        distance_metric: str,
    ) -> None:
        bucket = self.get_vector_bucket(
            vector_bucket_name=vector_bucket_name, vector_bucket_arn=vector_bucket_arn
        )
        index = Index(
            bucket=bucket,
            name=index_name,
            data_type=data_type,
            dimension=dimension,
            distance_metric=distance_metric,
        )
        bucket.indexes[index.index_arn] = index

    def delete_index(
        self, vector_bucket_name: str, index_name: str, index_arn: str
    ) -> None:
        index = self.get_index(vector_bucket_name, index_name, index_arn)
        index._bucket.indexes.pop(index.index_arn)

    def get_index(
        self, vector_bucket_name: str, index_name: str, index_arn: str
    ) -> Index:
        if index_arn:
            vector_bucket_name, _, index_name = parse_arn(index_arn).resource_id.split(
                "/"
            )
        try:
            bucket = self.get_vector_bucket(
                vector_bucket_name=vector_bucket_name, vector_bucket_arn=None
            )
            for index in bucket.indexes.values():
                if index.index_name == index_name:
                    return index
        except VectorBucketNotFound:
            pass
        raise IndexNotFound

    def list_indexes(
        self, vector_bucket_name: str, vector_bucket_arn: str
    ) -> list[Index]:
        """Pagination is not yet implemented. The prefix-parameter is also not yet implemented."""
        bucket = self.get_vector_bucket(
            vector_bucket_name, vector_bucket_arn=vector_bucket_arn
        )
        return list(bucket.indexes.values())

    def delete_vector_bucket_policy(
        self, vector_bucket_name: str, vector_bucket_arn: str
    ) -> None:
        bucket = self.get_vector_bucket(
            vector_bucket_name, vector_bucket_arn=vector_bucket_arn
        )
        bucket.policy = None

    def get_vector_bucket_policy(
        self, vector_bucket_name: str, vector_bucket_arn: str
    ) -> str:
        bucket = self.get_vector_bucket(
            vector_bucket_name, vector_bucket_arn=vector_bucket_arn
        )
        if not bucket.policy:
            raise VectorBucketPolicyNotFound
        return bucket.policy

    def put_vector_bucket_policy(
        self, vector_bucket_name: str, vector_bucket_arn: str, policy: str
    ) -> None:
        bucket = self.get_vector_bucket(
            vector_bucket_name, vector_bucket_arn=vector_bucket_arn
        )
        bucket.policy = policy

    def put_vectors(
        self,
        vector_bucket_name: str,
        index_name: str,
        index_arn: str,
        vectors: list[VectorType],
    ) -> None:
        index = self.get_index(
            vector_bucket_name, index_name=index_name, index_arn=index_arn
        )

        for vector in vectors:
            provided = len(vector["data"][index.data_type])  # type: ignore[literal-required]
            if provided != index.dimension:
                raise VectorWrongDimension(
                    key=vector["key"], actual=index.dimension, provided=provided
                )

        for vector in vectors:
            index.vectors[vector["key"]] = Vector.from_dict(vector)

    def get_vectors(
        self, vector_bucket_name: str, index_name: str, index_arn: str, keys: list[str]
    ) -> list[Vector]:
        index = self.get_index(
            vector_bucket_name, index_name=index_name, index_arn=index_arn
        )
        return [index.vectors[name] for name in index.vectors if name in keys]

    def list_vectors(
        self, vector_bucket_name: str, index_name: str, index_arn: str
    ) -> Iterable[Vector]:
        """
        Pagination is not yet implemented
        Segmentation is not yet implemented
        """
        index = self.get_index(
            vector_bucket_name, index_name=index_name, index_arn=index_arn
        )
        return index.vectors.values()

    def delete_vectors(
        self, vector_bucket_name: str, index_name: str, index_arn: str, keys: list[str]
    ) -> None:
        index = self.get_index(
            vector_bucket_name, index_name=index_name, index_arn=index_arn
        )
        for key in keys:
            index.vectors.pop(key, None)


s3vectors_backends = BackendDict(
    S3VectorsBackend,
    "s3vectors",
    use_boto3_regions=False,
    additional_regions=PARTITION_NAMES,
)