generated from coulomb/repo-seed
723 lines
27 KiB
Python
723 lines
27 KiB
Python
from pathlib import Path
|
|
|
|
from kontextual_engine import (
|
|
Actor,
|
|
ActorType,
|
|
AssetIngestionService,
|
|
AssetQueryRequest,
|
|
AssetRegistryService,
|
|
AssetRepresentation,
|
|
AssetRetrievalService,
|
|
Classification,
|
|
ContextEntity,
|
|
ContextEntityQueryRequest,
|
|
ContextEntityType,
|
|
InMemoryAssetRegistryRepository,
|
|
LifecycleState,
|
|
MetadataRecord,
|
|
OperationContext,
|
|
PolicyDecision,
|
|
RelationshipQueryRequest,
|
|
RepresentationKind,
|
|
RetrievalFeedbackLabel,
|
|
RetrievalFeedbackRequest,
|
|
Sensitivity,
|
|
SQLiteAssetRegistryRepository,
|
|
SourceReference,
|
|
)
|
|
|
|
|
|
def test_asset_retrieval_returns_stable_paginated_envelope() -> None:
|
|
repo = InMemoryAssetRegistryRepository()
|
|
registry = AssetRegistryService(repo)
|
|
retrieval = AssetRetrievalService(repo)
|
|
context = operation_context()
|
|
source_ref = SourceReference(source_system="repo", path="docs/b.md", checksum="sha256:b")
|
|
representation = AssetRepresentation.from_content(
|
|
"asset-bravo",
|
|
RepresentationKind.NORMALIZED,
|
|
"text/plain",
|
|
"bravo content",
|
|
storage_ref="object://bravo-normalized",
|
|
source_ref_id=source_ref.id,
|
|
)
|
|
|
|
registry.create_asset(
|
|
"Charlie",
|
|
Classification(asset_type="note", sensitivity=Sensitivity.INTERNAL),
|
|
context,
|
|
asset_id="asset-charlie",
|
|
)
|
|
registry.create_asset(
|
|
"Bravo",
|
|
Classification(asset_type="document", sensitivity=Sensitivity.PUBLIC, owner="Docs", topics=("retrieval",)),
|
|
context,
|
|
asset_id="asset-bravo",
|
|
source_refs=[source_ref],
|
|
representations=[representation],
|
|
metadata_records=[MetadataRecord("status", "approved", confirmed=True)],
|
|
)
|
|
registry.create_asset(
|
|
"Alpha",
|
|
Classification(asset_type="document", sensitivity=Sensitivity.PUBLIC, owner="Docs"),
|
|
context,
|
|
asset_id="asset-alpha",
|
|
)
|
|
|
|
first_page = retrieval.query_assets(
|
|
AssetQueryRequest(asset_type="document", sort_by="title", limit=1),
|
|
context,
|
|
)
|
|
second_page = retrieval.query_assets(
|
|
AssetQueryRequest(asset_type="document", sort_by="title", limit=1, offset=1),
|
|
context,
|
|
)
|
|
|
|
assert first_page.success is True
|
|
assert first_page.total == 2
|
|
assert first_page.result_count == 1
|
|
assert first_page.next_offset == 1
|
|
assert first_page.items[0].asset.id == "asset-alpha"
|
|
assert second_page.next_offset is None
|
|
assert second_page.items[0].asset.id == "asset-bravo"
|
|
assert second_page.to_dict()["results"][0]["source_refs"][0]["path"] == "docs/b.md"
|
|
assert second_page.to_dict()["results"][0]["representations"][0]["storage_ref"] == "object://bravo-normalized"
|
|
assert second_page.to_dict()["results"][0]["metadata_records"][0]["key"] == "status"
|
|
assert second_page.to_dict()["correlation_id"] == "corr-retrieval"
|
|
|
|
|
|
def test_asset_retrieval_filters_source_metadata_lifecycle_and_representation_kind() -> None:
|
|
repo = InMemoryAssetRegistryRepository()
|
|
registry = AssetRegistryService(repo)
|
|
retrieval = AssetRetrievalService(repo)
|
|
context = operation_context()
|
|
source_ref = SourceReference(source_system="repo", path="docs/guide.md", checksum="sha256:guide")
|
|
registry.create_asset(
|
|
"Guide",
|
|
Classification(
|
|
asset_type="guide",
|
|
sensitivity=Sensitivity.INTERNAL,
|
|
owner="Docs",
|
|
topics=("retrieval",),
|
|
review_state="approved",
|
|
),
|
|
context,
|
|
asset_id="asset-guide",
|
|
source_refs=[source_ref],
|
|
representations=[
|
|
AssetRepresentation.from_content(
|
|
"asset-guide",
|
|
RepresentationKind.NORMALIZED,
|
|
"text/plain",
|
|
"guide normalized",
|
|
)
|
|
],
|
|
metadata_records=[MetadataRecord("channel", "internal", confirmed=True)],
|
|
)
|
|
draft = registry.create_asset(
|
|
"Draft",
|
|
Classification(asset_type="guide", sensitivity=Sensitivity.INTERNAL, owner="Docs"),
|
|
context,
|
|
asset_id="asset-draft",
|
|
metadata_records=[MetadataRecord("channel", "internal", confirmed=False)],
|
|
)
|
|
registry.transition_lifecycle(draft.asset.id, LifecycleState.RETIRED, context)
|
|
|
|
result = retrieval.query_assets(
|
|
AssetQueryRequest(
|
|
asset_type="guide",
|
|
lifecycle=LifecycleState.ACTIVE,
|
|
owner="Docs",
|
|
topic="retrieval",
|
|
review_state="approved",
|
|
metadata_filters={"channel": "internal"},
|
|
confirmed_metadata_only=True,
|
|
source_system="repo",
|
|
source_path="docs/guide.md",
|
|
representation_kind=RepresentationKind.NORMALIZED,
|
|
),
|
|
context,
|
|
)
|
|
|
|
assert [item.asset.id for item in result.items] == ["asset-guide"]
|
|
assert result.items[0].representations[0].kind == RepresentationKind.NORMALIZED
|
|
assert result.items[0].metadata_records[0].confirmed is True
|
|
|
|
|
|
def test_asset_retrieval_invalid_query_returns_structured_diagnostics() -> None:
|
|
retrieval = AssetRetrievalService(InMemoryAssetRegistryRepository())
|
|
result = retrieval.query_assets(
|
|
AssetQueryRequest(
|
|
lifecycle="missing",
|
|
representation_kind="summary",
|
|
sort_by="rank",
|
|
sort_order="sideways",
|
|
limit=0,
|
|
offset=-1,
|
|
),
|
|
operation_context(),
|
|
)
|
|
|
|
assert result.success is False
|
|
assert result.total == 0
|
|
assert result.items == ()
|
|
assert {diagnostic.code for diagnostic in result.diagnostics} == {
|
|
"retrieval.lifecycle_invalid",
|
|
"retrieval.representation_kind_invalid",
|
|
"retrieval.sort_invalid",
|
|
"retrieval.sort_order_invalid",
|
|
"retrieval.limit_invalid",
|
|
"retrieval.offset_invalid",
|
|
}
|
|
payload = result.to_dict()
|
|
assert payload["success"] is False
|
|
assert payload["diagnostics"][0]["severity"] == "error"
|
|
|
|
|
|
def test_asset_retrieval_lexical_search_over_normalized_content(tmp_path: Path) -> None:
|
|
alpha = tmp_path / "alpha.txt"
|
|
beta = tmp_path / "beta.txt"
|
|
alpha.write_text("alpha retrieval signal\nalpha again\n", encoding="utf-8")
|
|
beta.write_text("beta only\n", encoding="utf-8")
|
|
repo = InMemoryAssetRegistryRepository()
|
|
ingestion = AssetIngestionService(repo)
|
|
retrieval = AssetRetrievalService(repo)
|
|
context = operation_context()
|
|
|
|
ingestion.ingest_file(alpha, context, asset_id="asset-alpha")
|
|
ingestion.ingest_file(beta, context, asset_id="asset-beta")
|
|
refresh = retrieval.refresh_index()
|
|
result = retrieval.query_assets(AssetQueryRequest(text="alpha"), context)
|
|
zero = retrieval.query_assets(AssetQueryRequest(text="gamma"), context)
|
|
|
|
assert refresh.indexed_assets == 2
|
|
assert refresh.indexed_representations == 2
|
|
assert [item.asset.id for item in result.items] == ["asset-alpha"]
|
|
assert result.items[0].relevance["strategy"] == "lexical_substring"
|
|
assert result.items[0].relevance["match_count"] == 2
|
|
assert result.items[0].relevance["representation_ids"]
|
|
assert result.metadata["zero_result"] is False
|
|
assert result.metadata["lexical_index"] == {
|
|
"indexed_assets": 2,
|
|
"indexed_representations": 2,
|
|
}
|
|
assert zero.total == 0
|
|
assert zero.metadata["zero_result"] is True
|
|
|
|
|
|
def test_asset_retrieval_returns_permission_filtered_source_grounded_snippets() -> None:
|
|
repo = InMemoryAssetRegistryRepository()
|
|
registry = AssetRegistryService(repo)
|
|
retrieval = AssetRetrievalService(repo, policy_gateway=DenyConfidentialRetrievalPolicy())
|
|
context = operation_context()
|
|
public_source = SourceReference(source_system="repo", path="docs/public.md", checksum="sha256:public")
|
|
confidential_source = SourceReference(
|
|
source_system="repo",
|
|
path="docs/confidential.md",
|
|
checksum="sha256:confidential",
|
|
)
|
|
registry.create_asset(
|
|
"Public Snippet",
|
|
Classification(asset_type="document", sensitivity=Sensitivity.PUBLIC),
|
|
context,
|
|
asset_id="asset-public-snippet",
|
|
source_refs=[public_source],
|
|
representations=[
|
|
AssetRepresentation.from_content(
|
|
"asset-public-snippet",
|
|
RepresentationKind.NORMALIZED,
|
|
"text/markdown",
|
|
"normalized public",
|
|
source_ref_id=public_source.id,
|
|
metadata={
|
|
"search_text": "Public alpha signal that should be citeable from the normalized document.",
|
|
"extractor": "markitect-tool",
|
|
"markitect_selector": "section:intro",
|
|
},
|
|
)
|
|
],
|
|
)
|
|
registry.create_asset(
|
|
"Confidential Snippet",
|
|
Classification(asset_type="document", sensitivity=Sensitivity.CONFIDENTIAL),
|
|
context,
|
|
asset_id="asset-confidential-snippet",
|
|
source_refs=[confidential_source],
|
|
representations=[
|
|
AssetRepresentation.from_content(
|
|
"asset-confidential-snippet",
|
|
RepresentationKind.NORMALIZED,
|
|
"text/markdown",
|
|
"normalized confidential",
|
|
source_ref_id=confidential_source.id,
|
|
metadata={
|
|
"search_text": "Confidential alpha signal must not leak as a snippet.",
|
|
"extractor": "markitect-tool",
|
|
"markitect_selector": "section:private",
|
|
},
|
|
)
|
|
],
|
|
)
|
|
|
|
retrieval.refresh_index()
|
|
result = retrieval.query_assets(
|
|
AssetQueryRequest(text="alpha", include_snippets=True, max_snippets=1, snippet_radius=12),
|
|
context,
|
|
)
|
|
|
|
assert [item.asset.id for item in result.items] == ["asset-public-snippet"]
|
|
assert result.items[0].relevance["snippet_count"] == 1
|
|
snippet = result.items[0].snippets[0]
|
|
assert snippet.asset_id == "asset-public-snippet"
|
|
assert snippet.source_ref_id == public_source.id
|
|
assert snippet.match_text == "alpha"
|
|
assert "alpha" in snippet.text
|
|
assert snippet.provenance["extractor"] == "markitect-tool"
|
|
assert snippet.provenance["markitect_selector"] == "section:intro"
|
|
assert result.to_dict()["results"][0]["snippets"][0]["source_ref_id"] == public_source.id
|
|
retrieval_audit = [
|
|
event for event in repo.list_audit_events(correlation_id="corr-retrieval")
|
|
if event.operation == "retrieval.assets.query"
|
|
][-1]
|
|
assert retrieval_audit.details["permission_filtered_count"] == 1
|
|
|
|
|
|
def test_asset_retrieval_filters_are_backend_portable_with_sqlite(tmp_path: Path) -> None:
|
|
repo = SQLiteAssetRegistryRepository(tmp_path / "registry.sqlite")
|
|
registry = AssetRegistryService(repo)
|
|
retrieval = AssetRetrievalService(repo)
|
|
context = operation_context()
|
|
source_ref = SourceReference(source_system="local_file", path="docs/report.txt", checksum="sha256:report")
|
|
registry.create_asset(
|
|
"Report",
|
|
Classification(
|
|
asset_type="report",
|
|
sensitivity=Sensitivity.INTERNAL,
|
|
owner="Analytics",
|
|
topics=("retrieval", "quarterly"),
|
|
review_state="approved",
|
|
metadata={"collection": "reports"},
|
|
),
|
|
context,
|
|
asset_id="asset-report",
|
|
source_refs=[source_ref],
|
|
representations=[
|
|
AssetRepresentation.from_content(
|
|
"asset-report",
|
|
RepresentationKind.NORMALIZED,
|
|
"text/plain",
|
|
"quarterly retrieval report",
|
|
metadata={"search_text": "quarterly retrieval report"},
|
|
)
|
|
],
|
|
metadata_records=[
|
|
MetadataRecord("collection", "reports", confirmed=True),
|
|
MetadataRecord("tags", ["finance", "retrieval"], confirmed=True),
|
|
],
|
|
)
|
|
registry.create_asset(
|
|
"Other",
|
|
Classification(asset_type="report", sensitivity=Sensitivity.INTERNAL, owner="Analytics"),
|
|
context,
|
|
asset_id="asset-other",
|
|
metadata_records=[MetadataRecord("collection", "misc", confirmed=True)],
|
|
)
|
|
|
|
retrieval.refresh_index()
|
|
result = retrieval.query_assets(
|
|
AssetQueryRequest(
|
|
text="retrieval",
|
|
asset_type="report",
|
|
sensitivity=Sensitivity.INTERNAL,
|
|
owner="Analytics",
|
|
tags=("finance", "retrieval"),
|
|
collection="reports",
|
|
source_system="local_file",
|
|
source_path="docs/report.txt",
|
|
metadata_filters={"collection": "reports"},
|
|
confirmed_metadata_only=True,
|
|
),
|
|
context,
|
|
)
|
|
|
|
assert [item.asset.id for item in result.items] == ["asset-report"]
|
|
assert result.items[0].relevance["match_count"] == 1
|
|
assert result.items[0].metadata_records[0].confirmed is True
|
|
|
|
|
|
def test_asset_retrieval_filters_by_context_entity_workflow_run_and_related_asset() -> None:
|
|
repo = InMemoryAssetRegistryRepository()
|
|
registry = AssetRegistryService(repo)
|
|
retrieval = AssetRetrievalService(repo)
|
|
context = operation_context()
|
|
classification = Classification(asset_type="document", sensitivity=Sensitivity.INTERNAL)
|
|
policy = registry.create_asset("Policy", classification, context, asset_id="asset-policy")
|
|
implementation = registry.create_asset(
|
|
"Implementation",
|
|
classification,
|
|
context,
|
|
asset_id="asset-implementation",
|
|
)
|
|
registry.create_asset("Unrelated", classification, context, asset_id="asset-unrelated")
|
|
project = ContextEntity(
|
|
entity_type=ContextEntityType.PROJECT,
|
|
name="Kontextual Engine",
|
|
external_ref="project:kontextual",
|
|
metadata={"phase": "mvp"},
|
|
entity_id="entity-project-kontextual",
|
|
)
|
|
workflow_run = ContextEntity(
|
|
entity_type=ContextEntityType.WORKFLOW_RUN,
|
|
name="Initial ingestion",
|
|
external_ref="workflow-run-42",
|
|
entity_id="entity-workflow-run-42",
|
|
)
|
|
|
|
registry.link_asset_to_context_entity(
|
|
policy.asset.id,
|
|
project,
|
|
"about_project",
|
|
context,
|
|
confidence=0.96,
|
|
provenance={"producer": "test-fixture"},
|
|
)
|
|
registry.link_asset_to_context_entity(
|
|
implementation.asset.id,
|
|
workflow_run,
|
|
"produced_by_run",
|
|
context,
|
|
)
|
|
registry.link_asset_to_asset(
|
|
implementation.asset.id,
|
|
policy.asset.id,
|
|
"implements",
|
|
context,
|
|
confidence=0.88,
|
|
provenance={"basis": "workplan"},
|
|
)
|
|
|
|
project_result = retrieval.query_assets(
|
|
AssetQueryRequest(
|
|
context_entity_type=ContextEntityType.PROJECT,
|
|
context_entity_name="Kontextual Engine",
|
|
relationship_predicate="about_project",
|
|
),
|
|
context,
|
|
)
|
|
related_result = retrieval.query_assets(
|
|
AssetQueryRequest(
|
|
related_asset_id=policy.asset.id,
|
|
relationship_predicate="implements",
|
|
),
|
|
context,
|
|
)
|
|
workflow_result = retrieval.query_assets(
|
|
AssetQueryRequest(workflow_run_id="workflow-run-42"),
|
|
context,
|
|
)
|
|
|
|
assert [item.asset.id for item in project_result.items] == ["asset-policy"]
|
|
assert project_result.items[0].relationships[0].predicate == "about_project"
|
|
assert project_result.items[0].relationships[0].confidence == 0.96
|
|
assert project_result.items[0].context_entities[0].entity_id == "entity-project-kontextual"
|
|
assert project_result.to_dict()["results"][0]["relationships"][0]["provenance"]["producer"] == "test-fixture"
|
|
assert [item.asset.id for item in related_result.items] == ["asset-implementation"]
|
|
assert related_result.items[0].relationships[0].target_id == "asset-policy"
|
|
assert [item.asset.id for item in workflow_result.items] == ["asset-implementation"]
|
|
assert workflow_result.items[0].context_entities[0].entity_type == ContextEntityType.WORKFLOW_RUN
|
|
|
|
|
|
def test_context_entity_and_relationship_queries_are_backend_portable_with_sqlite(tmp_path: Path) -> None:
|
|
repo = SQLiteAssetRegistryRepository(tmp_path / "registry.sqlite")
|
|
registry = AssetRegistryService(repo)
|
|
retrieval = AssetRetrievalService(repo)
|
|
context = operation_context()
|
|
policy = registry.create_asset(
|
|
"Policy",
|
|
Classification(asset_type="policy", sensitivity=Sensitivity.INTERNAL),
|
|
context,
|
|
asset_id="asset-policy",
|
|
)
|
|
case = ContextEntity(
|
|
entity_type=ContextEntityType.CASE,
|
|
name="Migration Case",
|
|
external_ref="case:migration",
|
|
metadata={"priority": "high"},
|
|
entity_id="entity-case-migration",
|
|
)
|
|
linked = registry.link_asset_to_context_entity(
|
|
policy.asset.id,
|
|
case,
|
|
"about_case",
|
|
context,
|
|
confidence=0.73,
|
|
provenance={"source": "test"},
|
|
)
|
|
|
|
entities = retrieval.query_context_entities(
|
|
ContextEntityQueryRequest(
|
|
entity_type="case",
|
|
external_ref="case:migration",
|
|
metadata_filters={"priority": "high"},
|
|
),
|
|
context,
|
|
)
|
|
relationships = retrieval.query_relationships(
|
|
RelationshipQueryRequest(
|
|
context_entity_id="entity-case-migration",
|
|
predicate="about_case",
|
|
target_kind="context_entity",
|
|
),
|
|
context,
|
|
)
|
|
|
|
assert entities.total == 1
|
|
assert entities.items[0].asset_ids == ("asset-policy",)
|
|
assert entities.items[0].relationship_count == 1
|
|
assert relationships.total == 1
|
|
assert relationships.items[0].relationship.relationship_id == linked.relationship.relationship_id
|
|
assert relationships.items[0].source_asset.id == "asset-policy"
|
|
assert relationships.items[0].target_entity.entity_id == "entity-case-migration"
|
|
assert relationships.to_dict()["results"][0]["confidence"] == 0.73
|
|
|
|
|
|
def test_graph_retrieval_invalid_queries_return_structured_diagnostics() -> None:
|
|
retrieval = AssetRetrievalService(InMemoryAssetRegistryRepository())
|
|
context = operation_context()
|
|
|
|
asset_result = retrieval.query_assets(
|
|
AssetQueryRequest(context_entity_type="planet", relationship_direction="sideways"),
|
|
context,
|
|
)
|
|
entity_result = retrieval.query_context_entities(
|
|
ContextEntityQueryRequest(entity_type="planet", sort_by="rank", limit=0),
|
|
context,
|
|
)
|
|
relationship_result = retrieval.query_relationships(
|
|
RelationshipQueryRequest(
|
|
target_kind="memory_phase",
|
|
direction="sideways",
|
|
sort_order="diagonal",
|
|
offset=-1,
|
|
),
|
|
context,
|
|
)
|
|
|
|
assert asset_result.success is False
|
|
assert {diagnostic.code for diagnostic in asset_result.diagnostics} == {
|
|
"retrieval.context_entity_type_invalid",
|
|
"retrieval.relationship_direction_invalid",
|
|
}
|
|
assert entity_result.success is False
|
|
assert {diagnostic.code for diagnostic in entity_result.diagnostics} == {
|
|
"retrieval.context_entity_type_invalid",
|
|
"retrieval.sort_invalid",
|
|
"retrieval.limit_invalid",
|
|
}
|
|
assert relationship_result.success is False
|
|
assert {diagnostic.code for diagnostic in relationship_result.diagnostics} == {
|
|
"retrieval.relationship_target_kind_invalid",
|
|
"retrieval.relationship_direction_invalid",
|
|
"retrieval.sort_order_invalid",
|
|
"retrieval.offset_invalid",
|
|
}
|
|
|
|
|
|
def test_retrieval_policy_filters_assets_relationships_and_context_payloads() -> None:
|
|
repo = InMemoryAssetRegistryRepository()
|
|
registry = AssetRegistryService(repo)
|
|
retrieval = AssetRetrievalService(repo, policy_gateway=DenyConfidentialRetrievalPolicy())
|
|
context = operation_context()
|
|
public = registry.create_asset(
|
|
"Public",
|
|
Classification(asset_type="document", sensitivity=Sensitivity.PUBLIC),
|
|
context,
|
|
asset_id="asset-public",
|
|
)
|
|
confidential = registry.create_asset(
|
|
"Confidential",
|
|
Classification(asset_type="document", sensitivity=Sensitivity.CONFIDENTIAL),
|
|
context,
|
|
asset_id="asset-confidential",
|
|
)
|
|
case = ContextEntity(
|
|
entity_type=ContextEntityType.CASE,
|
|
name="Visible Case",
|
|
entity_id="entity-visible-case",
|
|
)
|
|
registry.link_asset_to_context_entity(public.asset.id, case, "about_case", context)
|
|
registry.link_asset_to_context_entity(confidential.asset.id, case, "about_case", context)
|
|
registry.link_asset_to_asset(public.asset.id, confidential.asset.id, "references", context)
|
|
|
|
assets = retrieval.query_assets(AssetQueryRequest(sort_by="asset_id"), context)
|
|
relationships = retrieval.query_relationships(
|
|
RelationshipQueryRequest(predicate="references"),
|
|
context,
|
|
)
|
|
entities = retrieval.query_context_entities(
|
|
ContextEntityQueryRequest(entity_id="entity-visible-case"),
|
|
context,
|
|
)
|
|
|
|
assert [item.asset.id for item in assets.items] == ["asset-public"]
|
|
assert assets.total == 1
|
|
assert assets.metadata["policy_enforced"] is True
|
|
assert relationships.total == 0
|
|
assert entities.items[0].asset_ids == ("asset-public",)
|
|
assert entities.items[0].relationship_count == 1
|
|
audit_events = repo.list_audit_events(correlation_id="corr-retrieval")
|
|
retrieval_audit = [event for event in audit_events if event.operation == "retrieval.assets.query"][-1]
|
|
assert retrieval_audit.outcome.value == "partial"
|
|
assert retrieval_audit.details["permission_filtered_count"] == 1
|
|
assert retrieval_audit.policy_decision.action == "retrieval.assets.query"
|
|
|
|
|
|
def test_retrieval_scope_policy_fail_closed_returns_empty_denied_envelope() -> None:
|
|
repo = InMemoryAssetRegistryRepository()
|
|
registry = AssetRegistryService(repo)
|
|
retrieval = AssetRetrievalService(repo, policy_gateway=BrokenRetrievalPolicy())
|
|
context = operation_context()
|
|
registry.create_asset(
|
|
"Public",
|
|
Classification(asset_type="document", sensitivity=Sensitivity.PUBLIC),
|
|
context,
|
|
asset_id="asset-public",
|
|
)
|
|
|
|
result = retrieval.query_assets(AssetQueryRequest(), context)
|
|
|
|
assert result.success is False
|
|
assert result.total == 0
|
|
assert result.items == ()
|
|
assert [diagnostic.code for diagnostic in result.diagnostics] == ["retrieval.permission_denied"]
|
|
decision = result.diagnostics[0].details["policy_decision"]
|
|
assert decision["effect"] == "fail_closed"
|
|
audit_event = repo.list_audit_events(correlation_id="corr-retrieval")[-1]
|
|
assert audit_event.operation == "retrieval.assets.query"
|
|
assert audit_event.outcome.value == "denied"
|
|
assert audit_event.policy_decision.effect.value == "fail_closed"
|
|
|
|
|
|
def test_retrieval_feedback_persists_and_quality_metrics_use_feedback_and_audit(tmp_path: Path) -> None:
|
|
db_path = tmp_path / "registry.sqlite"
|
|
repo = SQLiteAssetRegistryRepository(db_path)
|
|
registry = AssetRegistryService(repo)
|
|
retrieval = AssetRetrievalService(repo)
|
|
context = operation_context()
|
|
source = SourceReference(source_system="repo", path="docs/feedback.md", checksum="sha256:feedback")
|
|
registry.create_asset(
|
|
"Feedback Source",
|
|
Classification(asset_type="document", sensitivity=Sensitivity.PUBLIC),
|
|
context,
|
|
asset_id="asset-feedback",
|
|
source_refs=[source],
|
|
representations=[
|
|
AssetRepresentation.from_content(
|
|
"asset-feedback",
|
|
RepresentationKind.NORMALIZED,
|
|
"text/markdown",
|
|
"normalized feedback",
|
|
source_ref_id=source.id,
|
|
metadata={"search_text": "alpha feedback citation target", "extractor": "plain-text"},
|
|
)
|
|
],
|
|
)
|
|
|
|
retrieval.refresh_index()
|
|
query_result = retrieval.query_assets(
|
|
AssetQueryRequest(text="alpha", include_snippets=True),
|
|
context,
|
|
)
|
|
feedback = retrieval.record_feedback(
|
|
RetrievalFeedbackRequest(
|
|
label=RetrievalFeedbackLabel.USEFUL,
|
|
query=query_result.request.to_dict(),
|
|
result_ref={
|
|
"asset_id": "asset-feedback",
|
|
"rank": 1,
|
|
"representation_id": query_result.items[0].representations[0].representation_id,
|
|
"source_ref_id": source.id,
|
|
},
|
|
metadata={"citation": True},
|
|
),
|
|
context,
|
|
)
|
|
|
|
reloaded = AssetRetrievalService(SQLiteAssetRegistryRepository(db_path))
|
|
records = reloaded.list_feedback(correlation_id="corr-retrieval")
|
|
metrics = reloaded.quality_metrics(query_results=(query_result,), precision_at_k=1)
|
|
|
|
assert feedback.success is True
|
|
assert records[0].feedback_id == feedback.record.feedback_id
|
|
assert records[0].query["text"] == "alpha"
|
|
assert records[0].result_ref["asset_id"] == "asset-feedback"
|
|
assert metrics.feedback_count == 1
|
|
assert metrics.useful_count == 1
|
|
assert metrics.zero_result_rate == 0.0
|
|
assert metrics.precision_at_k == 1.0
|
|
assert metrics.citation_precision == 1.0
|
|
assert metrics.permission_filter_observation_count >= 1
|
|
assert metrics.average_permission_filter_duration_ms is not None
|
|
|
|
|
|
def test_retrieval_feedback_invalid_label_returns_structured_diagnostic() -> None:
|
|
retrieval = AssetRetrievalService(InMemoryAssetRegistryRepository())
|
|
result = retrieval.record_feedback(
|
|
RetrievalFeedbackRequest(label="maybe", query={}, result_ref={}),
|
|
operation_context(),
|
|
)
|
|
|
|
assert result.success is False
|
|
assert result.record is None
|
|
assert [diagnostic.code for diagnostic in result.diagnostics] == ["retrieval.feedback_label_invalid"]
|
|
|
|
|
|
def operation_context() -> OperationContext:
|
|
actor = Actor.create(
|
|
ActorType.HUMAN,
|
|
actor_id="user-retrieval",
|
|
display_name="Retrieval Tester",
|
|
groups=["engineering"],
|
|
)
|
|
return OperationContext.create(actor, correlation_id="corr-retrieval")
|
|
|
|
|
|
class DenyConfidentialRetrievalPolicy:
|
|
def authorize(
|
|
self,
|
|
context: OperationContext,
|
|
action: str,
|
|
resource: str,
|
|
*,
|
|
resource_metadata: dict | None = None,
|
|
) -> PolicyDecision:
|
|
resource_metadata = resource_metadata or {}
|
|
if action == "asset.retrieve" and resource_metadata.get("sensitivity") == Sensitivity.CONFIDENTIAL.value:
|
|
return PolicyDecision.fail_closed(
|
|
context.actor.id,
|
|
action,
|
|
resource,
|
|
reason="confidential retrieval denied in test policy",
|
|
context={"resource_metadata": resource_metadata},
|
|
)
|
|
return PolicyDecision.allow(
|
|
context.actor.id,
|
|
action,
|
|
resource,
|
|
context={"resource_metadata": resource_metadata},
|
|
)
|
|
|
|
|
|
class BrokenRetrievalPolicy:
|
|
def authorize(
|
|
self,
|
|
context: OperationContext,
|
|
action: str,
|
|
resource: str,
|
|
*,
|
|
resource_metadata: dict | None = None,
|
|
) -> PolicyDecision:
|
|
if action == "retrieval.assets.query":
|
|
raise RuntimeError("policy context unavailable")
|
|
return PolicyDecision.allow(context.actor.id, action, resource)
|