generated from coulomb/repo-seed
stable asset queries, lexical search, filters, contextual entity and relationship retrieval, permission-aware fail-closed behavior, source-grounded snippets, feedback capture, and KPI hooks
This commit is contained in:
@@ -49,6 +49,8 @@ from .core import (
|
||||
PolicyEffect,
|
||||
RelationshipTargetKind,
|
||||
RepresentationKind,
|
||||
RetrievalFeedbackLabel,
|
||||
RetrievalFeedbackRecord,
|
||||
Sensitivity,
|
||||
SourceReference,
|
||||
SourcePayload,
|
||||
@@ -81,8 +83,23 @@ from .services import (
|
||||
AssetChangeResult,
|
||||
AssetIngestionResult,
|
||||
AssetIngestionService,
|
||||
AssetQueryItem,
|
||||
AssetQueryRequest,
|
||||
AssetQueryResult,
|
||||
AssetRegistryService,
|
||||
AssetRetrievalService,
|
||||
ContextEntityQueryItem,
|
||||
ContextEntityQueryRequest,
|
||||
ContextEntityQueryResult,
|
||||
LexicalIndexRefreshResult,
|
||||
RelationshipChangeResult,
|
||||
RelationshipQueryItem,
|
||||
RelationshipQueryRequest,
|
||||
RelationshipQueryResult,
|
||||
RetrievalFeedbackRequest,
|
||||
RetrievalFeedbackResult,
|
||||
RetrievalQualityMetrics,
|
||||
RetrievalSnippet,
|
||||
)
|
||||
from .storage import InMemoryKnowledgeRepository
|
||||
from .workflows import (
|
||||
@@ -108,8 +125,12 @@ __all__ = [
|
||||
"AssetChangeResult",
|
||||
"AssetIngestionResult",
|
||||
"AssetIngestionService",
|
||||
"AssetQueryItem",
|
||||
"AssetQueryRequest",
|
||||
"AssetQueryResult",
|
||||
"AssetRegistryRepository",
|
||||
"AssetRegistryService",
|
||||
"AssetRetrievalService",
|
||||
"AssetVersion",
|
||||
"AuditEvent",
|
||||
"AuditOutcome",
|
||||
@@ -121,6 +142,9 @@ __all__ = [
|
||||
"Collection",
|
||||
"ContextAssembler",
|
||||
"ContextEntity",
|
||||
"ContextEntityQueryItem",
|
||||
"ContextEntityQueryRequest",
|
||||
"ContextEntityQueryResult",
|
||||
"ContextEntityType",
|
||||
"ContextItem",
|
||||
"ContextPackage",
|
||||
@@ -146,6 +170,7 @@ __all__ = [
|
||||
"IdempotencyStatus",
|
||||
"KnowledgeAsset",
|
||||
"KontextualError",
|
||||
"LexicalIndexRefreshResult",
|
||||
"LifecycleState",
|
||||
"MetadataFieldDefinition",
|
||||
"MetadataRecord",
|
||||
@@ -167,9 +192,18 @@ __all__ = [
|
||||
"Relationship",
|
||||
"RelationshipChangeResult",
|
||||
"RelationshipGraph",
|
||||
"RelationshipQueryItem",
|
||||
"RelationshipQueryRequest",
|
||||
"RelationshipQueryResult",
|
||||
"RelationshipTargetKind",
|
||||
"RelationshipType",
|
||||
"RepresentationKind",
|
||||
"RetrievalFeedbackLabel",
|
||||
"RetrievalFeedbackRecord",
|
||||
"RetrievalFeedbackRequest",
|
||||
"RetrievalFeedbackResult",
|
||||
"RetrievalQualityMetrics",
|
||||
"RetrievalSnippet",
|
||||
"RunManifest",
|
||||
"RunStatus",
|
||||
"Sensitivity",
|
||||
|
||||
@@ -21,6 +21,7 @@ from kontextual_engine.core import (
|
||||
MetadataSchema,
|
||||
MetadataSchemaAssignment,
|
||||
RepresentationKind,
|
||||
RetrievalFeedbackRecord,
|
||||
Sensitivity,
|
||||
)
|
||||
from kontextual_engine.errors import NotFoundError, ValidationError
|
||||
@@ -38,6 +39,7 @@ class InMemoryAssetRegistryRepository:
|
||||
relationships: dict[str, CoreRelationship] = field(default_factory=dict)
|
||||
versions: dict[str, list[AssetVersion]] = field(default_factory=dict)
|
||||
audit_events: dict[str, AuditEvent] = field(default_factory=dict)
|
||||
retrieval_feedback: dict[str, RetrievalFeedbackRecord] = field(default_factory=dict)
|
||||
idempotency_records: dict[str, IdempotencyRecord] = field(default_factory=dict)
|
||||
ingestion_jobs: dict[str, IngestionJob] = field(default_factory=dict)
|
||||
|
||||
@@ -253,6 +255,24 @@ class InMemoryAssetRegistryRepository:
|
||||
events = [event for event in events if event.correlation_id == correlation_id]
|
||||
return sorted(events, key=lambda event: event.occurred_at)
|
||||
|
||||
def save_retrieval_feedback(self, record: RetrievalFeedbackRecord) -> RetrievalFeedbackRecord:
|
||||
self.get_actor(record.actor_id)
|
||||
self.retrieval_feedback[record.feedback_id] = record
|
||||
return record
|
||||
|
||||
def list_retrieval_feedback(
|
||||
self,
|
||||
*,
|
||||
correlation_id: str | None = None,
|
||||
label: str | None = None,
|
||||
) -> list[RetrievalFeedbackRecord]:
|
||||
records: Iterable[RetrievalFeedbackRecord] = self.retrieval_feedback.values()
|
||||
if correlation_id is not None:
|
||||
records = [record for record in records if record.correlation_id == correlation_id]
|
||||
if label is not None:
|
||||
records = [record for record in records if record.label.value == label]
|
||||
return sorted(records, key=lambda record: (record.created_at, record.feedback_id))
|
||||
|
||||
def save_idempotency_record(self, record: IdempotencyRecord) -> IdempotencyRecord:
|
||||
self.idempotency_records[record.key] = record
|
||||
return record
|
||||
|
||||
@@ -24,6 +24,7 @@ from kontextual_engine.core import (
|
||||
MetadataSchemaAssignment,
|
||||
RepresentationKind,
|
||||
RelationshipTargetKind,
|
||||
RetrievalFeedbackRecord,
|
||||
Sensitivity,
|
||||
)
|
||||
from kontextual_engine.errors import NotFoundError, ValidationError
|
||||
@@ -466,6 +467,56 @@ class SQLiteAssetRegistryRepository:
|
||||
rows = self._all(f"select payload from audit_events{where} order by occurred_at, rowid", tuple(params))
|
||||
return [AuditEvent.from_dict(_loads(row["payload"])) for row in rows]
|
||||
|
||||
def save_retrieval_feedback(self, record: RetrievalFeedbackRecord) -> RetrievalFeedbackRecord:
|
||||
try:
|
||||
with self._connect() as conn:
|
||||
conn.execute(
|
||||
"""
|
||||
insert into retrieval_feedback (id, label, actor_id, correlation_id, created_at, payload)
|
||||
values (?, ?, ?, ?, ?, ?)
|
||||
on conflict(id) do update set
|
||||
label=excluded.label,
|
||||
actor_id=excluded.actor_id,
|
||||
correlation_id=excluded.correlation_id,
|
||||
created_at=excluded.created_at,
|
||||
payload=excluded.payload
|
||||
""",
|
||||
(
|
||||
record.feedback_id,
|
||||
record.label.value,
|
||||
record.actor_id,
|
||||
record.correlation_id,
|
||||
record.created_at,
|
||||
_json(record.to_dict()),
|
||||
),
|
||||
)
|
||||
except sqlite3.IntegrityError as exc:
|
||||
if _is_foreign_key_error(exc):
|
||||
raise ValidationError(
|
||||
"Retrieval feedback references an unknown actor",
|
||||
details={"actor_id": record.actor_id, "feedback_id": record.feedback_id},
|
||||
) from exc
|
||||
raise
|
||||
return record
|
||||
|
||||
def list_retrieval_feedback(
|
||||
self,
|
||||
*,
|
||||
correlation_id: str | None = None,
|
||||
label: str | None = None,
|
||||
) -> list[RetrievalFeedbackRecord]:
|
||||
clauses = []
|
||||
params: list[Any] = []
|
||||
if correlation_id is not None:
|
||||
clauses.append("correlation_id = ?")
|
||||
params.append(correlation_id)
|
||||
if label is not None:
|
||||
clauses.append("label = ?")
|
||||
params.append(label)
|
||||
where = f" where {' and '.join(clauses)}" if clauses else ""
|
||||
rows = self._all(f"select payload from retrieval_feedback{where} order by created_at, id", tuple(params))
|
||||
return [RetrievalFeedbackRecord.from_dict(_loads(row["payload"])) for row in rows]
|
||||
|
||||
def save_idempotency_record(self, record: IdempotencyRecord) -> IdempotencyRecord:
|
||||
with self._connect() as conn:
|
||||
conn.execute(
|
||||
@@ -620,6 +671,15 @@ class SQLiteAssetRegistryRepository:
|
||||
payload text not null,
|
||||
foreign key(actor_id) references actors(id)
|
||||
);
|
||||
create table if not exists retrieval_feedback (
|
||||
id text primary key,
|
||||
label text not null,
|
||||
actor_id text not null,
|
||||
correlation_id text not null,
|
||||
created_at text not null,
|
||||
payload text not null,
|
||||
foreign key(actor_id) references actors(id)
|
||||
);
|
||||
create table if not exists idempotency_records (
|
||||
key text primary key,
|
||||
operation text not null,
|
||||
@@ -647,6 +707,8 @@ class SQLiteAssetRegistryRepository:
|
||||
create index if not exists idx_versions_asset on asset_versions(asset_id);
|
||||
create index if not exists idx_audit_target on audit_events(target);
|
||||
create index if not exists idx_audit_correlation on audit_events(correlation_id);
|
||||
create index if not exists idx_retrieval_feedback_label on retrieval_feedback(label);
|
||||
create index if not exists idx_retrieval_feedback_correlation on retrieval_feedback(correlation_id);
|
||||
create index if not exists idx_ingestion_jobs_status on ingestion_jobs(status);
|
||||
create index if not exists idx_ingestion_jobs_correlation on ingestion_jobs(correlation_id);
|
||||
"""
|
||||
|
||||
@@ -40,6 +40,7 @@ from .relationships import (
|
||||
CoreRelationship,
|
||||
RelationshipTargetKind,
|
||||
)
|
||||
from .retrieval_feedback import RetrievalFeedbackLabel, RetrievalFeedbackRecord
|
||||
|
||||
__all__ = [
|
||||
"Actor",
|
||||
@@ -76,6 +77,8 @@ __all__ = [
|
||||
"PolicyEffect",
|
||||
"RelationshipTargetKind",
|
||||
"RepresentationKind",
|
||||
"RetrievalFeedbackLabel",
|
||||
"RetrievalFeedbackRecord",
|
||||
"Sensitivity",
|
||||
"SourceReference",
|
||||
"SourcePayload",
|
||||
|
||||
@@ -19,6 +19,8 @@ class ContextEntityType(str, Enum):
|
||||
PROCESS = "process"
|
||||
SOURCE_SYSTEM = "source_system"
|
||||
TOPIC = "topic"
|
||||
WORKFLOW_RUN = "workflow_run"
|
||||
GENERATED_ARTIFACT = "generated_artifact"
|
||||
BUSINESS_OBJECT = "business_object"
|
||||
|
||||
|
||||
|
||||
59
src/kontextual_engine/core/retrieval_feedback.py
Normal file
59
src/kontextual_engine/core/retrieval_feedback.py
Normal file
@@ -0,0 +1,59 @@
|
||||
"""Retrieval feedback and quality signal primitives."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
|
||||
from .primitives import compact_dict, new_id, utc_now
|
||||
|
||||
|
||||
class RetrievalFeedbackLabel(str, Enum):
|
||||
USEFUL = "useful"
|
||||
IRRELEVANT = "irrelevant"
|
||||
MISSING = "missing"
|
||||
UNSAFE = "unsafe"
|
||||
LOW_CONFIDENCE = "low_confidence"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class RetrievalFeedbackRecord:
|
||||
label: RetrievalFeedbackLabel
|
||||
query: dict[str, Any]
|
||||
result_ref: dict[str, Any]
|
||||
actor_id: str
|
||||
correlation_id: str
|
||||
notes: str | None = None
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
feedback_id: str = field(default_factory=lambda: new_id("feedback"))
|
||||
created_at: str = field(default_factory=lambda: utc_now().isoformat())
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return compact_dict(
|
||||
{
|
||||
"feedback_id": self.feedback_id,
|
||||
"label": self.label.value,
|
||||
"query": dict(self.query),
|
||||
"result_ref": dict(self.result_ref),
|
||||
"actor_id": self.actor_id,
|
||||
"correlation_id": self.correlation_id,
|
||||
"notes": self.notes,
|
||||
"metadata": dict(self.metadata),
|
||||
"created_at": self.created_at,
|
||||
}
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict[str, Any]) -> "RetrievalFeedbackRecord":
|
||||
return cls(
|
||||
feedback_id=data["feedback_id"],
|
||||
label=RetrievalFeedbackLabel(data["label"]),
|
||||
query=dict(data.get("query", {})),
|
||||
result_ref=dict(data.get("result_ref", {})),
|
||||
actor_id=data["actor_id"],
|
||||
correlation_id=data["correlation_id"],
|
||||
notes=data.get("notes"),
|
||||
metadata=dict(data.get("metadata", {})),
|
||||
created_at=data["created_at"],
|
||||
)
|
||||
@@ -20,6 +20,7 @@ from kontextual_engine.core import (
|
||||
MetadataSchema,
|
||||
MetadataSchemaAssignment,
|
||||
RepresentationKind,
|
||||
RetrievalFeedbackRecord,
|
||||
Sensitivity,
|
||||
)
|
||||
|
||||
@@ -90,6 +91,14 @@ class AssetRegistryRepository(Protocol):
|
||||
correlation_id: str | None = None,
|
||||
) -> list[AuditEvent]: ...
|
||||
|
||||
def save_retrieval_feedback(self, record: RetrievalFeedbackRecord) -> RetrievalFeedbackRecord: ...
|
||||
def list_retrieval_feedback(
|
||||
self,
|
||||
*,
|
||||
correlation_id: str | None = None,
|
||||
label: str | None = None,
|
||||
) -> list[RetrievalFeedbackRecord]: ...
|
||||
|
||||
def save_idempotency_record(self, record: IdempotencyRecord) -> IdempotencyRecord: ...
|
||||
def get_idempotency_record(self, key: str) -> IdempotencyRecord | None: ...
|
||||
|
||||
|
||||
@@ -6,11 +6,43 @@ from .asset_service import (
|
||||
RelationshipChangeResult,
|
||||
)
|
||||
from .ingestion_service import AssetIngestionResult, AssetIngestionService
|
||||
from .retrieval_service import (
|
||||
AssetQueryItem,
|
||||
AssetQueryRequest,
|
||||
AssetQueryResult,
|
||||
AssetRetrievalService,
|
||||
ContextEntityQueryItem,
|
||||
ContextEntityQueryRequest,
|
||||
ContextEntityQueryResult,
|
||||
LexicalIndexRefreshResult,
|
||||
RelationshipQueryItem,
|
||||
RelationshipQueryRequest,
|
||||
RelationshipQueryResult,
|
||||
RetrievalFeedbackRequest,
|
||||
RetrievalFeedbackResult,
|
||||
RetrievalQualityMetrics,
|
||||
RetrievalSnippet,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"AssetChangeResult",
|
||||
"AssetIngestionResult",
|
||||
"AssetIngestionService",
|
||||
"AssetQueryItem",
|
||||
"AssetQueryRequest",
|
||||
"AssetQueryResult",
|
||||
"AssetRegistryService",
|
||||
"AssetRetrievalService",
|
||||
"ContextEntityQueryItem",
|
||||
"ContextEntityQueryRequest",
|
||||
"ContextEntityQueryResult",
|
||||
"LexicalIndexRefreshResult",
|
||||
"RelationshipChangeResult",
|
||||
"RelationshipQueryItem",
|
||||
"RelationshipQueryRequest",
|
||||
"RelationshipQueryResult",
|
||||
"RetrievalFeedbackRequest",
|
||||
"RetrievalFeedbackResult",
|
||||
"RetrievalQualityMetrics",
|
||||
"RetrievalSnippet",
|
||||
]
|
||||
|
||||
@@ -298,6 +298,8 @@ class AssetIngestionService:
|
||||
metadata={
|
||||
"extractor": extractor.name,
|
||||
"normalized_hash": extraction.normalized.normalized_hash,
|
||||
"search_text": extraction.normalized.text,
|
||||
"search_text_length": len(extraction.normalized.text),
|
||||
"permission_context": dict(payload.permission_context),
|
||||
**extraction.metadata,
|
||||
},
|
||||
|
||||
1993
src/kontextual_engine/services/retrieval_service.py
Normal file
1993
src/kontextual_engine/services/retrieval_service.py
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user