Files
llm-connect/llm_connect/embedding_cache.py
tegwick e499edba90 feat: initial llm-connect package scaffold
Copy markitect.llm module into standalone llm_connect package.
All markitect.* imports replaced with llm_connect.* equivalents.
LLMError base class inlined (no markitect.exceptions dependency).
Verified: from llm_connect import create_adapter works.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-27 07:54:42 +01:00

65 lines
2.0 KiB
Python

"""
File-based embedding cache.
Stores embedding vectors in a single JSON file keyed by entity slug.
Each entry includes a content digest so stale embeddings are
automatically invalidated when entity content changes.
"""
import json
from pathlib import Path
from typing import Optional
class EmbeddingCache:
"""Persistent cache for embedding vectors.
Structure on disk (``embeddings.json``)::
{
"division-of-labour": {"digest": "abc123", "vector": [0.1, ...]},
...
}
"""
def __init__(self, cache_dir: Path):
self._path = cache_dir / "embeddings.json"
self._data: dict[str, dict] = {}
self._hits = 0
self._misses = 0
self._load()
def get(self, slug: str, content_digest: str) -> Optional[list[float]]:
"""Return the cached vector if *content_digest* matches, else ``None``."""
entry = self._data.get(slug)
if entry is not None and entry.get("digest") == content_digest:
self._hits += 1
return entry["vector"]
self._misses += 1
return None
def put(self, slug: str, content_digest: str, vector: list[float]) -> None:
"""Store or overwrite the embedding for *slug*."""
self._data[slug] = {"digest": content_digest, "vector": vector}
def save(self) -> None:
"""Write cache to disk."""
self._path.parent.mkdir(parents=True, exist_ok=True)
self._path.write_text(json.dumps(self._data, separators=(",", ":")))
def stats(self) -> dict:
"""Return cache statistics."""
return {
"entries": len(self._data),
"hits": self._hits,
"misses": self._misses,
}
def _load(self) -> None:
"""Read cache from disk if it exists."""
if self._path.is_file():
try:
self._data = json.loads(self._path.read_text())
except (json.JSONDecodeError, OSError):
self._data = {}