generated from coulomb/repo-seed
Copy markitect.llm module into standalone llm_connect package. All markitect.* imports replaced with llm_connect.* equivalents. LLMError base class inlined (no markitect.exceptions dependency). Verified: from llm_connect import create_adapter works. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
65 lines
2.0 KiB
Python
65 lines
2.0 KiB
Python
"""
|
|
File-based embedding cache.
|
|
|
|
Stores embedding vectors in a single JSON file keyed by entity slug.
|
|
Each entry includes a content digest so stale embeddings are
|
|
automatically invalidated when entity content changes.
|
|
"""
|
|
|
|
import json
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
|
|
class EmbeddingCache:
|
|
"""Persistent cache for embedding vectors.
|
|
|
|
Structure on disk (``embeddings.json``)::
|
|
|
|
{
|
|
"division-of-labour": {"digest": "abc123", "vector": [0.1, ...]},
|
|
...
|
|
}
|
|
"""
|
|
|
|
def __init__(self, cache_dir: Path):
|
|
self._path = cache_dir / "embeddings.json"
|
|
self._data: dict[str, dict] = {}
|
|
self._hits = 0
|
|
self._misses = 0
|
|
self._load()
|
|
|
|
def get(self, slug: str, content_digest: str) -> Optional[list[float]]:
|
|
"""Return the cached vector if *content_digest* matches, else ``None``."""
|
|
entry = self._data.get(slug)
|
|
if entry is not None and entry.get("digest") == content_digest:
|
|
self._hits += 1
|
|
return entry["vector"]
|
|
self._misses += 1
|
|
return None
|
|
|
|
def put(self, slug: str, content_digest: str, vector: list[float]) -> None:
|
|
"""Store or overwrite the embedding for *slug*."""
|
|
self._data[slug] = {"digest": content_digest, "vector": vector}
|
|
|
|
def save(self) -> None:
|
|
"""Write cache to disk."""
|
|
self._path.parent.mkdir(parents=True, exist_ok=True)
|
|
self._path.write_text(json.dumps(self._data, separators=(",", ":")))
|
|
|
|
def stats(self) -> dict:
|
|
"""Return cache statistics."""
|
|
return {
|
|
"entries": len(self._data),
|
|
"hits": self._hits,
|
|
"misses": self._misses,
|
|
}
|
|
|
|
def _load(self) -> None:
|
|
"""Read cache from disk if it exists."""
|
|
if self._path.is_file():
|
|
try:
|
|
self._data = json.loads(self._path.read_text())
|
|
except (json.JSONDecodeError, OSError):
|
|
self._data = {}
|