Files
markitect-main/markitect/llm/embedding_factory.py
tegwick 267368eb60 feat(llm): add embedding adapter with cache and similarity utils (S1.3)
Add OpenAI-compatible embedding support (works with both OpenAI and
OpenRouter), file-based embedding cache with content-digest invalidation,
and pure-Python cosine similarity utilities for downstream redundancy
detection.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-19 01:22:21 +01:00

51 lines
1.6 KiB
Python

"""
Factory for creating embedding adapters by provider name.
"""
from typing import Optional, Any
from markitect.llm.embedding_adapter import EmbeddingAdapter
from markitect.llm.exceptions import LLMConfigurationError
_EMBEDDING_PROVIDERS = {
"openai": "markitect.llm.embedding_openai.OpenAICompatibleEmbeddingAdapter",
"openrouter": "markitect.llm.embedding_openai.OpenAICompatibleEmbeddingAdapter",
}
def create_embedding_adapter(
provider: str = "openai",
model: Optional[str] = None,
api_key: Optional[str] = None,
**kwargs: Any,
) -> EmbeddingAdapter:
"""Instantiate an :class:`EmbeddingAdapter` for the given *provider*.
Args:
provider: ``"openai"`` or ``"openrouter"``.
model: Embedding model name (e.g. ``"text-embedding-3-small"``).
api_key: Explicit API key.
**kwargs: Extra keyword arguments forwarded to the adapter.
Returns:
A ready-to-use :class:`EmbeddingAdapter` instance.
Raises:
LLMConfigurationError: If *provider* is not recognised.
"""
if provider not in _EMBEDDING_PROVIDERS:
known = ", ".join(sorted(_EMBEDDING_PROVIDERS))
raise LLMConfigurationError(
f"Unknown embedding provider {provider!r}. Choose from: {known}",
context={"provider": provider},
)
# Lazy import
fqn = _EMBEDDING_PROVIDERS[provider]
module_path, class_name = fqn.rsplit(".", 1)
import importlib
mod = importlib.import_module(module_path)
cls = getattr(mod, class_name)
return cls(model=model, api_key=api_key, provider=provider, **kwargs)