""" File-based embedding cache. Stores embedding vectors in a single JSON file keyed by entity slug. Each entry includes a content digest so stale embeddings are automatically invalidated when entity content changes. """ import json from pathlib import Path from typing import Optional class EmbeddingCache: """Persistent cache for embedding vectors. Structure on disk (``embeddings.json``):: { "division-of-labour": {"digest": "abc123", "vector": [0.1, ...]}, ... } """ def __init__(self, cache_dir: Path): self._path = cache_dir / "embeddings.json" self._data: dict[str, dict] = {} self._hits = 0 self._misses = 0 self._load() def get(self, slug: str, content_digest: str) -> Optional[list[float]]: """Return the cached vector if *content_digest* matches, else ``None``.""" entry = self._data.get(slug) if entry is not None and entry.get("digest") == content_digest: self._hits += 1 return entry["vector"] self._misses += 1 return None def put(self, slug: str, content_digest: str, vector: list[float]) -> None: """Store or overwrite the embedding for *slug*.""" self._data[slug] = {"digest": content_digest, "vector": vector} def save(self) -> None: """Write cache to disk.""" self._path.parent.mkdir(parents=True, exist_ok=True) self._path.write_text(json.dumps(self._data, separators=(",", ":"))) def stats(self) -> dict: """Return cache statistics.""" return { "entries": len(self._data), "hits": self._hits, "misses": self._misses, } def _load(self) -> None: """Read cache from disk if it exists.""" if self._path.is_file(): try: self._data = json.loads(self._path.read_text()) except (json.JSONDecodeError, OSError): self._data = {}