feat(llm): add embedding adapter with cache and similarity utils (S1.3)
Add OpenAI-compatible embedding support (works with both OpenAI and OpenRouter), file-based embedding cache with content-digest invalidation, and pure-Python cosine similarity utilities for downstream redundancy detection. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
50
markitect/llm/embedding_factory.py
Normal file
50
markitect/llm/embedding_factory.py
Normal file
@@ -0,0 +1,50 @@
|
||||
"""
|
||||
Factory for creating embedding adapters by provider name.
|
||||
"""
|
||||
|
||||
from typing import Optional, Any
|
||||
|
||||
from markitect.llm.embedding_adapter import EmbeddingAdapter
|
||||
from markitect.llm.exceptions import LLMConfigurationError
|
||||
|
||||
_EMBEDDING_PROVIDERS = {
|
||||
"openai": "markitect.llm.embedding_openai.OpenAICompatibleEmbeddingAdapter",
|
||||
"openrouter": "markitect.llm.embedding_openai.OpenAICompatibleEmbeddingAdapter",
|
||||
}
|
||||
|
||||
|
||||
def create_embedding_adapter(
|
||||
provider: str = "openai",
|
||||
model: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> EmbeddingAdapter:
|
||||
"""Instantiate an :class:`EmbeddingAdapter` for the given *provider*.
|
||||
|
||||
Args:
|
||||
provider: ``"openai"`` or ``"openrouter"``.
|
||||
model: Embedding model name (e.g. ``"text-embedding-3-small"``).
|
||||
api_key: Explicit API key.
|
||||
**kwargs: Extra keyword arguments forwarded to the adapter.
|
||||
|
||||
Returns:
|
||||
A ready-to-use :class:`EmbeddingAdapter` instance.
|
||||
|
||||
Raises:
|
||||
LLMConfigurationError: If *provider* is not recognised.
|
||||
"""
|
||||
if provider not in _EMBEDDING_PROVIDERS:
|
||||
known = ", ".join(sorted(_EMBEDDING_PROVIDERS))
|
||||
raise LLMConfigurationError(
|
||||
f"Unknown embedding provider {provider!r}. Choose from: {known}",
|
||||
context={"provider": provider},
|
||||
)
|
||||
|
||||
# Lazy import
|
||||
fqn = _EMBEDDING_PROVIDERS[provider]
|
||||
module_path, class_name = fqn.rsplit(".", 1)
|
||||
import importlib
|
||||
mod = importlib.import_module(module_path)
|
||||
cls = getattr(mod, class_name)
|
||||
|
||||
return cls(model=model, api_key=api_key, provider=provider, **kwargs)
|
||||
Reference in New Issue
Block a user