optional semantic retrieval

This commit is contained in:
2026-04-26 16:05:27 +02:00
parent 7c3cd2ab63
commit 1bac1832f0
11 changed files with 453 additions and 3 deletions

View File

@@ -11,6 +11,7 @@ from pydantic_settings import BaseSettings, SettingsConfigDict
from repo_registry.core.service import RegistryService
from repo_registry.llm_extraction import LLMCandidateExtractor, create_llm_connect_adapter
from repo_registry.repo_ingestion.git import GitIngestionService
from repo_registry.semantic import HashingEmbeddingProvider
from repo_registry.storage.sqlite import NotFoundError, RegistryStore
from repo_registry.web_api.schemas import (
AbilityCreate,
@@ -60,6 +61,7 @@ class Settings(BaseSettings):
checkout_root: str = Field(default="var/checkouts")
llm_provider: str | None = Field(default=None)
llm_model: str | None = Field(default=None)
embedding_provider: str | None = Field(default=None)
def get_settings() -> Settings:
@@ -78,10 +80,14 @@ def get_service(settings: Settings = Depends(get_settings)) -> RegistryService:
model=settings.llm_model,
)
llm_extractor = LLMCandidateExtractor(adapter)
embedding_provider = None
if settings.embedding_provider == "hashing":
embedding_provider = HashingEmbeddingProvider()
return RegistryService(
store,
ingestion=GitIngestionService(settings.checkout_root),
llm_extractor=llm_extractor,
embedding_provider=embedding_provider,
)

View File

@@ -654,6 +654,9 @@ class SearchResultResponse(BaseModel):
capability_name: str | None = None
evidence_level: str | None = None
source_reference: str | None = None
text_score: float = 0.0
vector_score: float = 0.0
hybrid_score: float = 0.0
model_config = {
"json_schema_extra": {
@@ -673,6 +676,9 @@ class SearchResultResponse(BaseModel):
"capability_name": "Classify Incoming Email",
"evidence_level": None,
"source_reference": None,
"text_score": 1.0,
"vector_score": 0.0,
"hybrid_score": 0.88,
}
]
}