src/pqc_rag_signing/adapters/memory.py
1.7 KB · 54 lines · python Raw
1 """In-memory vector store - reference implementation + useful for tests."""
2
3 from __future__ import annotations
4
5 import math
6 from typing import Iterable
7
8 from pqc_rag_signing.adapters.base import VectorStoreAdapter
9 from pqc_rag_signing.chunk import SignedChunk
10
11
12 def cosine_similarity(a: list[float], b: list[float]) -> float:
13 if not a or not b or len(a) != len(b):
14 return 0.0
15 dot = sum(x * y for x, y in zip(a, b))
16 na = math.sqrt(sum(x * x for x in a)) or 1.0
17 nb = math.sqrt(sum(y * y for y in b)) or 1.0
18 return dot / (na * nb)
19
20
21 class InMemoryAdapter(VectorStoreAdapter):
22 """Simple in-memory vector store. Not for production - use it in tests
23 and as a template for real DB adapters (Chroma, Pinecone, Qdrant, etc.)."""
24
25 def __init__(self) -> None:
26 self._records: list[tuple[SignedChunk, list[float]]] = []
27
28 def upsert(
29 self,
30 chunks: Iterable[SignedChunk],
31 embeddings: list[list[float]],
32 ) -> None:
33 chunk_list = list(chunks)
34 if len(chunk_list) != len(embeddings):
35 raise ValueError(
36 f"chunk count {len(chunk_list)} != embedding count {len(embeddings)}"
37 )
38 for chunk, emb in zip(chunk_list, embeddings):
39 self._records.append((chunk, list(emb)))
40
41 def query(self, embedding: list[float], top_k: int = 5) -> list[SignedChunk]:
42 scored = [
43 (cosine_similarity(embedding, emb), chunk)
44 for chunk, emb in self._records
45 ]
46 scored.sort(key=lambda t: t[0], reverse=True)
47 return [c for _, c in scored[:top_k]]
48
49 def count(self) -> int:
50 return len(self._records)
51
52 def clear(self) -> None:
53 self._records.clear()
54