src/pqc_rag_signing/adapters/memory.py
| 1 | """In-memory vector store - reference implementation + useful for tests.""" |
| 2 | |
| 3 | from __future__ import annotations |
| 4 | |
| 5 | import math |
| 6 | from typing import Iterable |
| 7 | |
| 8 | from pqc_rag_signing.adapters.base import VectorStoreAdapter |
| 9 | from pqc_rag_signing.chunk import SignedChunk |
| 10 | |
| 11 | |
| 12 | def cosine_similarity(a: list[float], b: list[float]) -> float: |
| 13 | if not a or not b or len(a) != len(b): |
| 14 | return 0.0 |
| 15 | dot = sum(x * y for x, y in zip(a, b)) |
| 16 | na = math.sqrt(sum(x * x for x in a)) or 1.0 |
| 17 | nb = math.sqrt(sum(y * y for y in b)) or 1.0 |
| 18 | return dot / (na * nb) |
| 19 | |
| 20 | |
| 21 | class InMemoryAdapter(VectorStoreAdapter): |
| 22 | """Simple in-memory vector store. Not for production - use it in tests |
| 23 | and as a template for real DB adapters (Chroma, Pinecone, Qdrant, etc.).""" |
| 24 | |
| 25 | def __init__(self) -> None: |
| 26 | self._records: list[tuple[SignedChunk, list[float]]] = [] |
| 27 | |
| 28 | def upsert( |
| 29 | self, |
| 30 | chunks: Iterable[SignedChunk], |
| 31 | embeddings: list[list[float]], |
| 32 | ) -> None: |
| 33 | chunk_list = list(chunks) |
| 34 | if len(chunk_list) != len(embeddings): |
| 35 | raise ValueError( |
| 36 | f"chunk count {len(chunk_list)} != embedding count {len(embeddings)}" |
| 37 | ) |
| 38 | for chunk, emb in zip(chunk_list, embeddings): |
| 39 | self._records.append((chunk, list(emb))) |
| 40 | |
| 41 | def query(self, embedding: list[float], top_k: int = 5) -> list[SignedChunk]: |
| 42 | scored = [ |
| 43 | (cosine_similarity(embedding, emb), chunk) |
| 44 | for chunk, emb in self._records |
| 45 | ] |
| 46 | scored.sort(key=lambda t: t[0], reverse=True) |
| 47 | return [c for _, c in scored[:top_k]] |
| 48 | |
| 49 | def count(self) -> int: |
| 50 | return len(self._records) |
| 51 | |
| 52 | def clear(self) -> None: |
| 53 | self._records.clear() |
| 54 | |