diff --git a/README.md b/README.md index ca4b4a9..b6201ab 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,7 @@ Start here: - `docs/knowledge-operations-roadmap.md` - `docs/architecture-blueprint.md` - `docs/architecture-core-implementation.md` +- `docs/asset-registry-implementation.md` - `docs/stack-decision.md` - `docs/markitect-main-scope-assessment.md` - `docs/markitect-tool-reuse-boundary.md` diff --git a/docs/architecture-core-implementation.md b/docs/architecture-core-implementation.md index 986b246..d8180bd 100644 --- a/docs/architecture-core-implementation.md +++ b/docs/architecture-core-implementation.md @@ -87,7 +87,6 @@ SQLite, Markitect, LLM providers, or source-system SDKs. ## Next Implementation Boundary -The next workplan should build on these contracts rather than reusing the old -artifact model directly. The natural next step is `KONT-WP-0005`: asset -registry governance and durable state. - +The next workplan builds on these contracts rather than reusing the old +artifact model directly. The first `KONT-WP-0005` slice is recorded in +`docs/asset-registry-implementation.md`. diff --git a/docs/asset-registry-implementation.md b/docs/asset-registry-implementation.md new file mode 100644 index 0000000..84aac36 --- /dev/null +++ b/docs/asset-registry-implementation.md @@ -0,0 +1,83 @@ +# Asset Registry Implementation Note + +Date: 2026-05-05 + +Status: first implementation slice for `KONT-WP-0005`. + +## Purpose + +This note records the first governed asset registry implementation built on the +architecture core. It establishes the service/repository boundary needed before +durable ingestion, retrieval, transformation, and agent operations depend on +asset state. + +## Implemented Package Shape + +```text +src/kontextual_engine/ + ports/ + policy.py + repositories.py + services/ + asset_service.py + adapters/ + memory/asset_registry.py + sqlite/asset_registry.py +``` + +The service depends on engine-owned ports and domain core contracts. The memory +and SQLite repositories are adapters behind those ports. + +## Implemented Capabilities + +- Stable `KnowledgeAsset` creation with explicit source references. +- Separate source, normalized, and derived `AssetRepresentation` records. +- `MetadataRecord` persistence with inferred/confirmed semantics preserved. +- Actor and `OperationContext` required for material mutations. +- Policy gateway authorization before asset mutations. +- Fail-closed policy denial through `AuthorizationError`. +- Audit events for create, metadata update, representation update, lifecycle + transition, and denied mutations. +- Asset version records for create, content/representation changes, metadata + changes, and lifecycle changes. +- In-memory repository for deterministic tests. +- SQLite repository for local-first durable asset registry state. +- SQLite foreign-key enforcement for representation and metadata asset + references. + +## Current SQLite Tables + +- `actors` +- `assets` +- `representations` +- `metadata_records` +- `asset_versions` +- `audit_events` + +Payloads are stored as compact JSON envelopes while indexed columns carry +stable lookup fields such as asset ID, lifecycle, representation kind, digest, +sequence, actor ID, target, and correlation ID. + +## Not Yet Implemented + +- Full custom metadata schema validation. +- Relationship persistence in the new core registry. +- Policy assignment storage and enterprise policy adapters. +- Idempotency-key persistence for mutation deduplication. +- Conflict detection beyond version-sequence uniqueness. +- Restore and supersession service operations. +- Batch partial-failure envelopes. + +These remain in scope for later `KONT-WP-0005` tasks or adjacent workplans. + +## Test Coverage + +`tests/test_asset_registry.py` covers: + +- asset creation with source reference, representation, metadata, version, and + audit output, +- lifecycle denial with fail-closed policy and denied audit event, +- SQLite reload preserving asset lifecycle, representation, metadata, versions, + and audit history, +- SQLite referential integrity for representation asset references. + diff --git a/src/kontextual_engine/__init__.py b/src/kontextual_engine/__init__.py index 3f82c88..e6993b2 100644 --- a/src/kontextual_engine/__init__.py +++ b/src/kontextual_engine/__init__.py @@ -11,6 +11,8 @@ from .artifacts import ( bundle_digest, content_digest, ) +from .adapters.memory import InMemoryAssetRegistryRepository +from .adapters.sqlite import SQLiteAssetRegistryRepository from .context import ContextAssembler, ContextItem, ContextPackage from .core import ( Actor, @@ -38,6 +40,7 @@ from .core import ( ) from .errors import ( AdapterUnavailableError, + AuthorizationError, Diagnostic, DuplicateResourceError, KontextualError, @@ -45,8 +48,10 @@ from .errors import ( ValidationError, ) from .ingestion import IngestionRequest, IngestionResult, IngestionService +from .ports import AllowAllPolicyGateway, AssetRegistryRepository, PolicyGateway from .query import QueryEngine, QueryResult from .relationships import RelationshipGraph +from .services import AssetChangeResult, AssetRegistryService from .storage import InMemoryKnowledgeRepository from .workflows import ( InputBundle, @@ -60,6 +65,7 @@ from .workflows import ( __all__ = [ "__version__", "AdapterUnavailableError", + "AllowAllPolicyGateway", "Artifact", "ArtifactMetadata", "ArtifactReference", @@ -67,9 +73,13 @@ __all__ = [ "Actor", "ActorType", "AssetRepresentation", + "AssetChangeResult", + "AssetRegistryRepository", + "AssetRegistryService", "AssetVersion", "AuditEvent", "AuditOutcome", + "AuthorizationError", "Classification", "Collection", "ContextAssembler", @@ -81,6 +91,7 @@ __all__ = [ "DerivedArtifactLineage", "Diagnostic", "DuplicateResourceError", + "InMemoryAssetRegistryRepository", "InMemoryKnowledgeRepository", "IngestionRequest", "IngestionResult", @@ -94,6 +105,7 @@ __all__ = [ "OperationRun", "OperationStage", "OperationContext", + "PolicyGateway", "PolicyDecision", "PolicyEffect", "QueryEngine", @@ -107,6 +119,7 @@ __all__ = [ "RunStatus", "Sensitivity", "SourceReference", + "SQLiteAssetRegistryRepository", "ValidationError", "VersionChangeType", "WorkflowStep", diff --git a/src/kontextual_engine/adapters/__init__.py b/src/kontextual_engine/adapters/__init__.py new file mode 100644 index 0000000..9d0b864 --- /dev/null +++ b/src/kontextual_engine/adapters/__init__.py @@ -0,0 +1,2 @@ +"""Infrastructure adapters for engine-owned ports.""" + diff --git a/src/kontextual_engine/adapters/memory/__init__.py b/src/kontextual_engine/adapters/memory/__init__.py new file mode 100644 index 0000000..86c66f2 --- /dev/null +++ b/src/kontextual_engine/adapters/memory/__init__.py @@ -0,0 +1,6 @@ +"""In-memory adapters for deterministic tests.""" + +from .asset_registry import InMemoryAssetRegistryRepository + +__all__ = ["InMemoryAssetRegistryRepository"] + diff --git a/src/kontextual_engine/adapters/memory/asset_registry.py b/src/kontextual_engine/adapters/memory/asset_registry.py new file mode 100644 index 0000000..93c0041 --- /dev/null +++ b/src/kontextual_engine/adapters/memory/asset_registry.py @@ -0,0 +1,130 @@ +"""In-memory asset registry repository.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Iterable + +from kontextual_engine.core import ( + Actor, + AssetRepresentation, + AssetVersion, + AuditEvent, + KnowledgeAsset, + LifecycleState, + MetadataRecord, + RepresentationKind, +) +from kontextual_engine.errors import NotFoundError, ValidationError + + +@dataclass +class InMemoryAssetRegistryRepository: + actors: dict[str, Actor] = field(default_factory=dict) + assets: dict[str, KnowledgeAsset] = field(default_factory=dict) + representations: dict[str, AssetRepresentation] = field(default_factory=dict) + metadata_records: dict[str, list[MetadataRecord]] = field(default_factory=dict) + versions: dict[str, list[AssetVersion]] = field(default_factory=dict) + audit_events: dict[str, AuditEvent] = field(default_factory=dict) + + def save_actor(self, actor: Actor) -> Actor: + self.actors[actor.id] = actor + return actor + + def get_actor(self, actor_id: str) -> Actor: + try: + return self.actors[actor_id] + except KeyError as exc: + raise NotFoundError("Actor not found", details={"actor_id": actor_id}) from exc + + def save_asset(self, asset: KnowledgeAsset) -> KnowledgeAsset: + self.assets[asset.id] = asset + return asset + + def get_asset(self, asset_id: str) -> KnowledgeAsset: + try: + return self.assets[asset_id] + except KeyError as exc: + raise NotFoundError("Asset not found", details={"asset_id": asset_id}) from exc + + def list_assets( + self, + *, + lifecycle: LifecycleState | None = None, + asset_type: str | None = None, + ) -> list[KnowledgeAsset]: + assets: Iterable[KnowledgeAsset] = self.assets.values() + if lifecycle is not None: + assets = [asset for asset in assets if asset.lifecycle == lifecycle] + if asset_type is not None: + assets = [asset for asset in assets if asset.classification.asset_type == asset_type] + return sorted(assets, key=lambda asset: (asset.title, asset.id)) + + def save_representation(self, representation: AssetRepresentation) -> AssetRepresentation: + self.get_asset(representation.asset_id) + self.representations[representation.representation_id] = representation + return representation + + def get_representation(self, representation_id: str) -> AssetRepresentation: + try: + return self.representations[representation_id] + except KeyError as exc: + raise NotFoundError( + "Representation not found", + details={"representation_id": representation_id}, + ) from exc + + def list_representations( + self, + *, + asset_id: str | None = None, + kind: RepresentationKind | None = None, + ) -> list[AssetRepresentation]: + representations: Iterable[AssetRepresentation] = self.representations.values() + if asset_id is not None: + representations = [item for item in representations if item.asset_id == asset_id] + if kind is not None: + representations = [item for item in representations if item.kind == kind] + return sorted(representations, key=lambda item: (item.asset_id, item.kind.value, item.representation_id)) + + def save_metadata_record(self, asset_id: str, record: MetadataRecord) -> MetadataRecord: + self.get_asset(asset_id) + self.metadata_records.setdefault(asset_id, []).append(record) + return record + + def list_metadata_records(self, asset_id: str) -> list[MetadataRecord]: + self.get_asset(asset_id) + return list(self.metadata_records.get(asset_id, [])) + + def save_version(self, version: AssetVersion) -> AssetVersion: + self.get_asset(version.asset_id) + current = self.versions.setdefault(version.asset_id, []) + if any(existing.sequence == version.sequence for existing in current): + raise ValidationError( + "Version sequence already exists for asset", + details={"asset_id": version.asset_id, "sequence": version.sequence}, + ) + current.append(version) + return version + + def list_versions(self, asset_id: str) -> list[AssetVersion]: + self.get_asset(asset_id) + return sorted(self.versions.get(asset_id, []), key=lambda version: version.sequence) + + def save_audit_event(self, event: AuditEvent) -> AuditEvent: + self.audit_events[event.event_id] = event + return event + + def list_audit_events( + self, + *, + target: str | None = None, + correlation_id: str | None = None, + ) -> list[AuditEvent]: + events: Iterable[AuditEvent] = self.audit_events.values() + if target is not None: + events = [event for event in events if event.target == target] + if correlation_id is not None: + events = [event for event in events if event.correlation_id == correlation_id] + return sorted(events, key=lambda event: (event.occurred_at, event.event_id)) + diff --git a/src/kontextual_engine/adapters/sqlite/__init__.py b/src/kontextual_engine/adapters/sqlite/__init__.py new file mode 100644 index 0000000..1b1f7c2 --- /dev/null +++ b/src/kontextual_engine/adapters/sqlite/__init__.py @@ -0,0 +1,6 @@ +"""SQLite adapters for local-first durability.""" + +from .asset_registry import SQLiteAssetRegistryRepository + +__all__ = ["SQLiteAssetRegistryRepository"] + diff --git a/src/kontextual_engine/adapters/sqlite/asset_registry.py b/src/kontextual_engine/adapters/sqlite/asset_registry.py new file mode 100644 index 0000000..4b2cc44 --- /dev/null +++ b/src/kontextual_engine/adapters/sqlite/asset_registry.py @@ -0,0 +1,338 @@ +"""SQLite asset registry repository.""" + +from __future__ import annotations + +import json +import sqlite3 +from pathlib import Path +from typing import Any + +from kontextual_engine.core import ( + Actor, + AssetRepresentation, + AssetVersion, + AuditEvent, + KnowledgeAsset, + LifecycleState, + MetadataRecord, + RepresentationKind, +) +from kontextual_engine.errors import NotFoundError, ValidationError + + +class SQLiteAssetRegistryRepository: + def __init__(self, path: str | Path) -> None: + self.path = Path(path) + self.path.parent.mkdir(parents=True, exist_ok=True) + self._initialize() + + def save_actor(self, actor: Actor) -> Actor: + with self._connect() as conn: + conn.execute( + """ + insert into actors (id, actor_type, payload) + values (?, ?, ?) + on conflict(id) do update set + actor_type=excluded.actor_type, + payload=excluded.payload + """, + (actor.id, actor.actor_type.value, _json(actor.to_dict())), + ) + return actor + + def get_actor(self, actor_id: str) -> Actor: + row = self._one("select payload from actors where id = ?", (actor_id,)) + if row is None: + raise NotFoundError("Actor not found", details={"actor_id": actor_id}) + return Actor.from_dict(_loads(row["payload"])) + + def save_asset(self, asset: KnowledgeAsset) -> KnowledgeAsset: + with self._connect() as conn: + conn.execute( + """ + insert into assets (id, title, asset_type, lifecycle, payload) + values (?, ?, ?, ?, ?) + on conflict(id) do update set + title=excluded.title, + asset_type=excluded.asset_type, + lifecycle=excluded.lifecycle, + payload=excluded.payload + """, + ( + asset.id, + asset.title, + asset.classification.asset_type, + asset.lifecycle.value, + _json(asset.to_dict()), + ), + ) + return asset + + def get_asset(self, asset_id: str) -> KnowledgeAsset: + row = self._one("select payload from assets where id = ?", (asset_id,)) + if row is None: + raise NotFoundError("Asset not found", details={"asset_id": asset_id}) + return KnowledgeAsset.from_dict(_loads(row["payload"])) + + def list_assets( + self, + *, + lifecycle: LifecycleState | None = None, + asset_type: str | None = None, + ) -> list[KnowledgeAsset]: + clauses = [] + params: list[Any] = [] + if lifecycle is not None: + clauses.append("lifecycle = ?") + params.append(lifecycle.value) + if asset_type is not None: + clauses.append("asset_type = ?") + params.append(asset_type) + where = f" where {' and '.join(clauses)}" if clauses else "" + rows = self._all(f"select payload from assets{where} order by title, id", tuple(params)) + return [KnowledgeAsset.from_dict(_loads(row["payload"])) for row in rows] + + def save_representation(self, representation: AssetRepresentation) -> AssetRepresentation: + try: + with self._connect() as conn: + conn.execute( + """ + insert into representations (id, asset_id, kind, digest, payload) + values (?, ?, ?, ?, ?) + on conflict(id) do update set + asset_id=excluded.asset_id, + kind=excluded.kind, + digest=excluded.digest, + payload=excluded.payload + """, + ( + representation.representation_id, + representation.asset_id, + representation.kind.value, + representation.digest, + _json(representation.to_dict()), + ), + ) + except sqlite3.IntegrityError as exc: + raise ValidationError( + "Representation references an unknown asset", + details={ + "asset_id": representation.asset_id, + "representation_id": representation.representation_id, + }, + ) from exc + return representation + + def get_representation(self, representation_id: str) -> AssetRepresentation: + row = self._one("select payload from representations where id = ?", (representation_id,)) + if row is None: + raise NotFoundError( + "Representation not found", + details={"representation_id": representation_id}, + ) + return AssetRepresentation.from_dict(_loads(row["payload"])) + + def list_representations( + self, + *, + asset_id: str | None = None, + kind: RepresentationKind | None = None, + ) -> list[AssetRepresentation]: + clauses = [] + params: list[Any] = [] + if asset_id is not None: + clauses.append("asset_id = ?") + params.append(asset_id) + if kind is not None: + clauses.append("kind = ?") + params.append(kind.value) + where = f" where {' and '.join(clauses)}" if clauses else "" + rows = self._all( + f"select payload from representations{where} order by asset_id, kind, id", + tuple(params), + ) + return [AssetRepresentation.from_dict(_loads(row["payload"])) for row in rows] + + def save_metadata_record(self, asset_id: str, record: MetadataRecord) -> MetadataRecord: + try: + with self._connect() as conn: + conn.execute( + """ + insert into metadata_records (id, asset_id, key, payload) + values (?, ?, ?, ?) + on conflict(id) do update set + asset_id=excluded.asset_id, + key=excluded.key, + payload=excluded.payload + """, + (record.record_id, asset_id, record.key, _json(record.to_dict())), + ) + except sqlite3.IntegrityError as exc: + raise ValidationError( + "Metadata record references an unknown asset", + details={"asset_id": asset_id, "record_id": record.record_id}, + ) from exc + return record + + def list_metadata_records(self, asset_id: str) -> list[MetadataRecord]: + rows = self._all( + "select payload from metadata_records where asset_id = ? order by key, id", + (asset_id,), + ) + if not rows: + self.get_asset(asset_id) + return [MetadataRecord.from_dict(_loads(row["payload"])) for row in rows] + + def save_version(self, version: AssetVersion) -> AssetVersion: + try: + with self._connect() as conn: + conn.execute( + """ + insert into asset_versions (id, asset_id, sequence, change_type, payload) + values (?, ?, ?, ?, ?) + """, + ( + version.version_id, + version.asset_id, + version.sequence, + version.change_type.value, + _json(version.to_dict()), + ), + ) + except sqlite3.IntegrityError as exc: + raise ValidationError( + "Version sequence already exists for asset", + details={"asset_id": version.asset_id, "sequence": version.sequence}, + ) from exc + return version + + def list_versions(self, asset_id: str) -> list[AssetVersion]: + rows = self._all( + "select payload from asset_versions where asset_id = ? order by sequence", + (asset_id,), + ) + if not rows: + self.get_asset(asset_id) + return [AssetVersion.from_dict(_loads(row["payload"])) for row in rows] + + def save_audit_event(self, event: AuditEvent) -> AuditEvent: + with self._connect() as conn: + conn.execute( + """ + insert into audit_events (id, target, actor_id, correlation_id, outcome, occurred_at, payload) + values (?, ?, ?, ?, ?, ?, ?) + on conflict(id) do update set + target=excluded.target, + actor_id=excluded.actor_id, + correlation_id=excluded.correlation_id, + outcome=excluded.outcome, + occurred_at=excluded.occurred_at, + payload=excluded.payload + """, + ( + event.event_id, + event.target, + event.actor_id, + event.correlation_id, + event.outcome.value, + event.occurred_at, + _json(event.to_dict()), + ), + ) + return event + + def list_audit_events( + self, + *, + target: str | None = None, + correlation_id: str | None = None, + ) -> list[AuditEvent]: + clauses = [] + params: list[Any] = [] + if target is not None: + clauses.append("target = ?") + params.append(target) + if correlation_id is not None: + clauses.append("correlation_id = ?") + params.append(correlation_id) + where = f" where {' and '.join(clauses)}" if clauses else "" + rows = self._all(f"select payload from audit_events{where} order by occurred_at, id", tuple(params)) + return [AuditEvent.from_dict(_loads(row["payload"])) for row in rows] + + def _initialize(self) -> None: + with self._connect() as conn: + conn.executescript( + """ + create table if not exists actors ( + id text primary key, + actor_type text not null, + payload text not null + ); + create table if not exists assets ( + id text primary key, + title text not null, + asset_type text not null, + lifecycle text not null, + payload text not null + ); + create table if not exists representations ( + id text primary key, + asset_id text not null references assets(id) on delete cascade, + kind text not null, + digest text not null, + payload text not null + ); + create table if not exists metadata_records ( + id text primary key, + asset_id text not null references assets(id) on delete cascade, + key text not null, + payload text not null + ); + create table if not exists asset_versions ( + id text primary key, + asset_id text not null references assets(id) on delete cascade, + sequence integer not null, + change_type text not null, + payload text not null, + unique(asset_id, sequence) + ); + create table if not exists audit_events ( + id text primary key, + target text not null, + actor_id text not null, + correlation_id text not null, + outcome text not null, + occurred_at text not null, + payload text not null, + foreign key(actor_id) references actors(id) + ); + create index if not exists idx_assets_lifecycle on assets(lifecycle); + create index if not exists idx_representations_asset on representations(asset_id); + create index if not exists idx_metadata_asset on metadata_records(asset_id); + create index if not exists idx_versions_asset on asset_versions(asset_id); + create index if not exists idx_audit_target on audit_events(target); + create index if not exists idx_audit_correlation on audit_events(correlation_id); + """ + ) + + def _connect(self) -> sqlite3.Connection: + conn = sqlite3.connect(self.path) + conn.row_factory = sqlite3.Row + conn.execute("pragma foreign_keys = on") + return conn + + def _one(self, query: str, params: tuple[Any, ...]) -> sqlite3.Row | None: + with self._connect() as conn: + return conn.execute(query, params).fetchone() + + def _all(self, query: str, params: tuple[Any, ...]) -> list[sqlite3.Row]: + with self._connect() as conn: + return list(conn.execute(query, params).fetchall()) + + +def _json(value: dict[str, Any]) -> str: + return json.dumps(value, sort_keys=True, separators=(",", ":")) + + +def _loads(value: str) -> dict[str, Any]: + return json.loads(value) diff --git a/src/kontextual_engine/core/assets.py b/src/kontextual_engine/core/assets.py index 77f2f54..001588c 100644 --- a/src/kontextual_engine/core/assets.py +++ b/src/kontextual_engine/core/assets.py @@ -76,6 +76,22 @@ class AssetRepresentation: } ) + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "AssetRepresentation": + return cls( + representation_id=data["representation_id"], + asset_id=data["asset_id"], + kind=RepresentationKind(data["kind"]), + media_type=data["media_type"], + digest=data["digest"], + size_bytes=int(data["size_bytes"]), + storage_ref=data.get("storage_ref"), + producer=data.get("producer"), + source_ref_id=data.get("source_ref_id"), + metadata=dict(data.get("metadata", {})), + created_at=data["created_at"], + ) + @dataclass(frozen=True) class KnowledgeAsset: @@ -119,6 +135,9 @@ class KnowledgeAsset: return self return replace(self, aliases=self.aliases + (alias,), updated_at=utc_now().isoformat()) + def with_current_version(self, version_id: str) -> "KnowledgeAsset": + return replace(self, current_version_id=version_id, updated_at=utc_now().isoformat()) + def transition_lifecycle(self, lifecycle: LifecycleState | str) -> "KnowledgeAsset": lifecycle_state = LifecycleState(lifecycle) classification = replace(self.classification, lifecycle=lifecycle_state) @@ -145,3 +164,17 @@ class KnowledgeAsset: } ) + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "KnowledgeAsset": + return cls( + id=data["id"], + title=data["title"], + classification=Classification.from_dict(data["classification"]), + source_refs=tuple(SourceReference.from_dict(item) for item in data.get("source_refs", [])), + aliases=tuple(data.get("aliases", [])), + current_version_id=data.get("current_version_id"), + lifecycle=LifecycleState(data.get("lifecycle", LifecycleState.ACTIVE.value)), + metadata=dict(data.get("metadata", {})), + created_at=data["created_at"], + updated_at=data["updated_at"], + ) diff --git a/src/kontextual_engine/core/audit.py b/src/kontextual_engine/core/audit.py index 1865051..64e91e3 100644 --- a/src/kontextual_engine/core/audit.py +++ b/src/kontextual_engine/core/audit.py @@ -70,3 +70,18 @@ class AuditEvent: } ) + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "AuditEvent": + return cls( + event_id=data["event_id"], + operation=data["operation"], + target=data["target"], + outcome=AuditOutcome(data["outcome"]), + actor_id=data["actor_id"], + correlation_id=data["correlation_id"], + policy_decision=PolicyDecision.from_dict(data["policy_decision"]) + if data.get("policy_decision") + else None, + details=dict(data.get("details", {})), + occurred_at=data["occurred_at"], + ) diff --git a/src/kontextual_engine/core/metadata.py b/src/kontextual_engine/core/metadata.py index c3707a3..2ce5e9f 100644 --- a/src/kontextual_engine/core/metadata.py +++ b/src/kontextual_engine/core/metadata.py @@ -48,6 +48,18 @@ class Classification: } ) + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "Classification": + return cls( + asset_type=data["asset_type"], + sensitivity=Sensitivity(data.get("sensitivity", Sensitivity.INTERNAL.value)), + lifecycle=LifecycleState(data.get("lifecycle", LifecycleState.ACTIVE.value)), + topics=tuple(data.get("topics", [])), + owner=data.get("owner"), + review_state=data.get("review_state"), + metadata=dict(data.get("metadata", {})), + ) + @dataclass(frozen=True) class MetadataRecord: @@ -72,3 +84,14 @@ class MetadataRecord: } ) + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "MetadataRecord": + return cls( + record_id=data["record_id"], + key=data["key"], + value=data.get("value"), + provenance=dict(data.get("provenance", {})), + confidence=data.get("confidence"), + confirmed=bool(data.get("confirmed", False)), + created_at=data["created_at"], + ) diff --git a/src/kontextual_engine/core/policy.py b/src/kontextual_engine/core/policy.py index 26d2001..716ce10 100644 --- a/src/kontextual_engine/core/policy.py +++ b/src/kontextual_engine/core/policy.py @@ -77,3 +77,16 @@ class PolicyDecision: } ) + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "PolicyDecision": + return cls( + decision_id=data["decision_id"], + effect=PolicyEffect(data["effect"]), + subject_id=data["subject_id"], + action=data["action"], + resource=data["resource"], + reason=data.get("reason", ""), + obligations=dict(data.get("obligations", {})), + context=dict(data.get("context", {})), + decided_at=data["decided_at"], + ) diff --git a/src/kontextual_engine/core/provenance.py b/src/kontextual_engine/core/provenance.py index 80dfb34..1fb215a 100644 --- a/src/kontextual_engine/core/provenance.py +++ b/src/kontextual_engine/core/provenance.py @@ -48,6 +48,19 @@ class SourceReference: } ) + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "SourceReference": + return cls( + id=data["id"], + source_system=data["source_system"], + path=data.get("path"), + uri=data.get("uri"), + external_id=data.get("external_id"), + checksum=data.get("checksum"), + connector_ref=data.get("connector_ref"), + metadata=dict(data.get("metadata", {})), + ) + class VersionChangeType(str, Enum): CREATED = "created" @@ -93,6 +106,23 @@ class AssetVersion: } ) + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "AssetVersion": + return cls( + version_id=data["version_id"], + asset_id=data["asset_id"], + sequence=int(data["sequence"]), + change_type=VersionChangeType(data["change_type"]), + representation_ids=tuple(data.get("representation_ids", [])), + actor_id=data.get("actor_id"), + operation_id=data.get("operation_id"), + parent_version_id=data.get("parent_version_id"), + metadata_delta=dict(data.get("metadata_delta", {})), + relationship_delta=dict(data.get("relationship_delta", {})), + lifecycle=data.get("lifecycle"), + created_at=data["created_at"], + ) + @dataclass(frozen=True) class DerivedArtifactLineage: @@ -129,4 +159,3 @@ class DerivedArtifactLineage: if include_hash: data["lineage_hash"] = self.lineage_hash return data - diff --git a/src/kontextual_engine/errors.py b/src/kontextual_engine/errors.py index 7f254c4..26e0f3b 100644 --- a/src/kontextual_engine/errors.py +++ b/src/kontextual_engine/errors.py @@ -54,6 +54,9 @@ class ValidationError(KontextualError): code = "kontextual.validation" +class AuthorizationError(KontextualError): + code = "kontextual.authorization" + + class AdapterUnavailableError(KontextualError): code = "kontextual.adapter_unavailable" - diff --git a/src/kontextual_engine/ports/__init__.py b/src/kontextual_engine/ports/__init__.py new file mode 100644 index 0000000..b3d3e16 --- /dev/null +++ b/src/kontextual_engine/ports/__init__.py @@ -0,0 +1,11 @@ +"""Stable ports owned by the engine.""" + +from .policy import AllowAllPolicyGateway, PolicyGateway +from .repositories import AssetRegistryRepository + +__all__ = [ + "AllowAllPolicyGateway", + "AssetRegistryRepository", + "PolicyGateway", +] + diff --git a/src/kontextual_engine/ports/policy.py b/src/kontextual_engine/ports/policy.py new file mode 100644 index 0000000..99cdcce --- /dev/null +++ b/src/kontextual_engine/ports/policy.py @@ -0,0 +1,38 @@ +"""Policy decision ports for application services.""" + +from __future__ import annotations + +from typing import Any, Protocol + +from kontextual_engine.core import OperationContext, PolicyDecision + + +class PolicyGateway(Protocol): + def authorize( + self, + context: OperationContext, + action: str, + resource: str, + *, + resource_metadata: dict[str, Any] | None = None, + ) -> PolicyDecision: ... + + +class AllowAllPolicyGateway: + """Deterministic default for local development and tests.""" + + def authorize( + self, + context: OperationContext, + action: str, + resource: str, + *, + resource_metadata: dict[str, Any] | None = None, + ) -> PolicyDecision: + return PolicyDecision.allow( + context.actor.id, + action, + resource, + context={"gateway": "allow-all", "resource_metadata": resource_metadata or {}}, + ) + diff --git a/src/kontextual_engine/ports/repositories.py b/src/kontextual_engine/ports/repositories.py new file mode 100644 index 0000000..90758c6 --- /dev/null +++ b/src/kontextual_engine/ports/repositories.py @@ -0,0 +1,54 @@ +"""Repository ports for governed asset registry state.""" + +from __future__ import annotations + +from typing import Protocol + +from kontextual_engine.core import ( + Actor, + AssetRepresentation, + AssetVersion, + AuditEvent, + KnowledgeAsset, + LifecycleState, + MetadataRecord, + RepresentationKind, +) + + +class AssetRegistryRepository(Protocol): + def save_actor(self, actor: Actor) -> Actor: ... + def get_actor(self, actor_id: str) -> Actor: ... + + def save_asset(self, asset: KnowledgeAsset) -> KnowledgeAsset: ... + def get_asset(self, asset_id: str) -> KnowledgeAsset: ... + def list_assets( + self, + *, + lifecycle: LifecycleState | None = None, + asset_type: str | None = None, + ) -> list[KnowledgeAsset]: ... + + def save_representation(self, representation: AssetRepresentation) -> AssetRepresentation: ... + def get_representation(self, representation_id: str) -> AssetRepresentation: ... + def list_representations( + self, + *, + asset_id: str | None = None, + kind: RepresentationKind | None = None, + ) -> list[AssetRepresentation]: ... + + def save_metadata_record(self, asset_id: str, record: MetadataRecord) -> MetadataRecord: ... + def list_metadata_records(self, asset_id: str) -> list[MetadataRecord]: ... + + def save_version(self, version: AssetVersion) -> AssetVersion: ... + def list_versions(self, asset_id: str) -> list[AssetVersion]: ... + + def save_audit_event(self, event: AuditEvent) -> AuditEvent: ... + def list_audit_events( + self, + *, + target: str | None = None, + correlation_id: str | None = None, + ) -> list[AuditEvent]: ... + diff --git a/src/kontextual_engine/services/__init__.py b/src/kontextual_engine/services/__init__.py new file mode 100644 index 0000000..335b84a --- /dev/null +++ b/src/kontextual_engine/services/__init__.py @@ -0,0 +1,6 @@ +"""Application services for the engine.""" + +from .asset_service import AssetChangeResult, AssetRegistryService + +__all__ = ["AssetChangeResult", "AssetRegistryService"] + diff --git a/src/kontextual_engine/services/asset_service.py b/src/kontextual_engine/services/asset_service.py new file mode 100644 index 0000000..db151e6 --- /dev/null +++ b/src/kontextual_engine/services/asset_service.py @@ -0,0 +1,260 @@ +"""Application service for governed knowledge asset registry operations.""" + +from __future__ import annotations + +from dataclasses import dataclass, replace + +from kontextual_engine.core import ( + AssetRepresentation, + AssetVersion, + AuditEvent, + AuditOutcome, + Classification, + KnowledgeAsset, + LifecycleState, + MetadataRecord, + OperationContext, + PolicyDecision, + SourceReference, + VersionChangeType, +) +from kontextual_engine.errors import AuthorizationError +from kontextual_engine.ports import AllowAllPolicyGateway, AssetRegistryRepository, PolicyGateway + + +@dataclass(frozen=True) +class AssetChangeResult: + asset: KnowledgeAsset + version: AssetVersion + audit_event: AuditEvent + policy_decision: PolicyDecision + + +class AssetRegistryService: + def __init__( + self, + repository: AssetRegistryRepository, + *, + policy_gateway: PolicyGateway | None = None, + ) -> None: + self.repository = repository + self.policy_gateway = policy_gateway or AllowAllPolicyGateway() + + def create_asset( + self, + title: str, + classification: Classification, + context: OperationContext, + *, + source_refs: list[SourceReference] | None = None, + representations: list[AssetRepresentation] | None = None, + metadata_records: list[MetadataRecord] | None = None, + asset_id: str | None = None, + ) -> AssetChangeResult: + asset = KnowledgeAsset.create( + title, + classification, + asset_id=asset_id, + source_refs=source_refs, + ) + decision = self._authorize( + context, + "asset.create", + f"asset:{asset.id}", + resource_metadata={ + "asset_type": classification.asset_type, + "sensitivity": classification.sensitivity.value, + }, + ) + version = AssetVersion( + asset_id=asset.id, + sequence=1, + change_type=VersionChangeType.CREATED, + representation_ids=tuple(item.representation_id for item in representations or []), + actor_id=context.actor.id, + lifecycle=classification.lifecycle.value, + ) + asset = asset.with_current_version(version.version_id) + self.repository.save_actor(context.actor) + self.repository.save_asset(asset) + for representation in representations or []: + if representation.asset_id != asset.id: + representation = replace(representation, asset_id=asset.id) + self.repository.save_representation(representation) + for record in metadata_records or []: + self.repository.save_metadata_record(asset.id, record) + self.repository.save_version(version) + event = self._audit( + "asset.create", + f"asset:{asset.id}", + AuditOutcome.SUCCESS, + context, + decision, + details={"version_id": version.version_id}, + ) + return AssetChangeResult(asset, version, event, decision) + + def add_metadata_record( + self, + asset_id: str, + record: MetadataRecord, + context: OperationContext, + ) -> AssetChangeResult: + asset = self.repository.get_asset(asset_id) + decision = self._authorize(context, "asset.metadata.add", f"asset:{asset.id}") + next_sequence = self._next_sequence(asset.id) + self.repository.save_metadata_record(asset.id, record) + version = AssetVersion( + asset_id=asset.id, + sequence=next_sequence, + change_type=VersionChangeType.METADATA_CHANGED, + actor_id=context.actor.id, + parent_version_id=asset.current_version_id, + metadata_delta={record.key: record.value}, + lifecycle=asset.lifecycle.value, + ) + asset = asset.with_current_version(version.version_id) + self.repository.save_asset(asset) + self.repository.save_version(version) + event = self._audit( + "asset.metadata.add", + f"asset:{asset.id}", + AuditOutcome.SUCCESS, + context, + decision, + details={"record_id": record.record_id, "version_id": version.version_id}, + ) + return AssetChangeResult(asset, version, event, decision) + + def add_representation( + self, + asset_id: str, + representation: AssetRepresentation, + context: OperationContext, + ) -> AssetChangeResult: + asset = self.repository.get_asset(asset_id) + decision = self._authorize( + context, + "asset.representation.add", + f"asset:{asset.id}", + resource_metadata={"kind": representation.kind.value, "media_type": representation.media_type}, + ) + if representation.asset_id != asset.id: + representation = replace(representation, asset_id=asset.id) + self.repository.save_representation(representation) + version = AssetVersion( + asset_id=asset.id, + sequence=self._next_sequence(asset.id), + change_type=VersionChangeType.CONTENT_CHANGED, + representation_ids=(representation.representation_id,), + actor_id=context.actor.id, + parent_version_id=asset.current_version_id, + lifecycle=asset.lifecycle.value, + ) + asset = asset.with_current_version(version.version_id) + self.repository.save_asset(asset) + self.repository.save_version(version) + event = self._audit( + "asset.representation.add", + f"asset:{asset.id}", + AuditOutcome.SUCCESS, + context, + decision, + details={"representation_id": representation.representation_id, "version_id": version.version_id}, + ) + return AssetChangeResult(asset, version, event, decision) + + def transition_lifecycle( + self, + asset_id: str, + lifecycle: LifecycleState, + context: OperationContext, + ) -> AssetChangeResult: + asset = self.repository.get_asset(asset_id) + decision = self._authorize( + context, + "asset.lifecycle.transition", + f"asset:{asset.id}", + resource_metadata={"from": asset.lifecycle.value, "to": lifecycle.value}, + ) + updated = asset.transition_lifecycle(lifecycle) + version = AssetVersion( + asset_id=asset.id, + sequence=self._next_sequence(asset.id), + change_type=VersionChangeType.LIFECYCLE_CHANGED, + actor_id=context.actor.id, + parent_version_id=asset.current_version_id, + lifecycle=lifecycle.value, + metadata_delta={"lifecycle": {"from": asset.lifecycle.value, "to": lifecycle.value}}, + ) + updated = updated.with_current_version(version.version_id) + self.repository.save_asset(updated) + self.repository.save_version(version) + event = self._audit( + "asset.lifecycle.transition", + f"asset:{asset.id}", + AuditOutcome.SUCCESS, + context, + decision, + details={"version_id": version.version_id, "lifecycle": lifecycle.value}, + ) + return AssetChangeResult(updated, version, event, decision) + + def request_delete(self, asset_id: str, context: OperationContext) -> AssetChangeResult: + return self.transition_lifecycle(asset_id, LifecycleState.DELETE_REQUESTED, context) + + def get_asset(self, asset_id: str) -> KnowledgeAsset: + return self.repository.get_asset(asset_id) + + def _authorize( + self, + context: OperationContext, + action: str, + resource: str, + *, + resource_metadata: dict[str, str] | None = None, + ) -> PolicyDecision: + self.repository.save_actor(context.actor) + decision = self.policy_gateway.authorize( + context, + action, + resource, + resource_metadata=resource_metadata, + ) + if not decision.allowed: + self._audit(action, resource, AuditOutcome.DENIED, context, decision) + raise AuthorizationError( + "Operation denied by policy", + details={ + "action": action, + "resource": resource, + "correlation_id": context.correlation_id, + "policy_decision": decision.to_dict(), + }, + ) + return decision + + def _audit( + self, + operation: str, + target: str, + outcome: AuditOutcome, + context: OperationContext, + policy_decision: PolicyDecision, + *, + details: dict[str, str] | None = None, + ) -> AuditEvent: + event = AuditEvent.from_context( + operation, + target, + outcome, + context, + policy_decision=policy_decision, + details=details, + ) + return self.repository.save_audit_event(event) + + def _next_sequence(self, asset_id: str) -> int: + versions = self.repository.list_versions(asset_id) + return len(versions) + 1 + diff --git a/tests/test_asset_registry.py b/tests/test_asset_registry.py new file mode 100644 index 0000000..dcd77aa --- /dev/null +++ b/tests/test_asset_registry.py @@ -0,0 +1,182 @@ +from pathlib import Path + +import pytest + +from kontextual_engine import ( + Actor, + ActorType, + AssetRegistryService, + AssetRepresentation, + AuthorizationError, + Classification, + InMemoryAssetRegistryRepository, + LifecycleState, + MetadataRecord, + OperationContext, + PolicyDecision, + RepresentationKind, + Sensitivity, + SourceReference, + SQLiteAssetRegistryRepository, + ValidationError, +) + + +def test_asset_registry_service_creates_assets_with_versions_and_audit() -> None: + repo = InMemoryAssetRegistryRepository() + service = AssetRegistryService(repo) + context = operation_context() + source_ref = SourceReference( + source_system="repo", + path="docs/intent.md", + checksum="sha256:source", + ) + representation = AssetRepresentation.from_content( + "asset-intent", + RepresentationKind.SOURCE, + "text/markdown", + "# Intent\n", + storage_ref="object://intent-source", + source_ref_id=source_ref.id, + ) + metadata = MetadataRecord( + "topic", + "architecture", + provenance={"producer": "human"}, + confirmed=True, + ) + + result = service.create_asset( + "Intent", + Classification( + asset_type="document", + sensitivity=Sensitivity.INTERNAL, + owner="Platform Knowledge", + ), + context, + asset_id="asset-intent", + source_refs=[source_ref], + representations=[representation], + metadata_records=[metadata], + ) + + assert result.asset.id == "asset-intent" + assert result.asset.current_version_id == result.version.version_id + assert result.version.sequence == 1 + assert result.audit_event.outcome.value == "success" + assert result.policy_decision.allowed is True + assert repo.get_asset("asset-intent").source_refs[0].path == "docs/intent.md" + assert repo.list_representations(asset_id="asset-intent")[0].storage_ref == "object://intent-source" + assert repo.list_metadata_records("asset-intent")[0].confirmed is True + assert repo.list_audit_events(target="asset:asset-intent")[0].operation == "asset.create" + + +def test_asset_registry_lifecycle_policy_denial_fails_closed_and_audits() -> None: + repo = InMemoryAssetRegistryRepository() + service = AssetRegistryService(repo, policy_gateway=DenyLifecyclePolicy()) + context = operation_context() + created = service.create_asset( + "Governed Asset", + Classification(asset_type="document", sensitivity=Sensitivity.CONFIDENTIAL), + context, + asset_id="asset-governed", + ) + + with pytest.raises(AuthorizationError) as exc_info: + service.transition_lifecycle(created.asset.id, LifecycleState.RETIRED, context) + + events = repo.list_audit_events(target="asset:asset-governed") + + assert exc_info.value.details["correlation_id"] == "corr-test" + assert exc_info.value.details["policy_decision"]["effect"] == "fail_closed" + assert [event.outcome.value for event in events] == ["success", "denied"] + assert repo.get_asset("asset-governed").lifecycle == LifecycleState.ACTIVE + + +def test_sqlite_asset_registry_survives_reinstantiation(tmp_path: Path) -> None: + db_path = tmp_path / "registry.sqlite" + repo = SQLiteAssetRegistryRepository(db_path) + service = AssetRegistryService(repo) + context = operation_context() + source_ref = SourceReference(source_system="repo", path="README.md", checksum="sha256:readme") + source = AssetRepresentation.from_content( + "asset-readme", + RepresentationKind.SOURCE, + "text/markdown", + "# Readme\n", + storage_ref="object://readme-source", + ) + created = service.create_asset( + "Readme", + Classification(asset_type="document", sensitivity=Sensitivity.PUBLIC), + context, + asset_id="asset-readme", + source_refs=[source_ref], + representations=[source], + ) + service.add_metadata_record( + created.asset.id, + MetadataRecord("owner", "Platform Knowledge", confirmed=True), + context, + ) + service.request_delete(created.asset.id, context) + + reloaded = SQLiteAssetRegistryRepository(db_path) + asset = reloaded.get_asset("asset-readme") + + assert asset.lifecycle == LifecycleState.DELETE_REQUESTED + assert asset.source_refs[0].path == "README.md" + assert [item.kind for item in reloaded.list_representations(asset_id=asset.id)] == [ + RepresentationKind.SOURCE + ] + assert [item.key for item in reloaded.list_metadata_records(asset.id)] == ["owner"] + assert [version.sequence for version in reloaded.list_versions(asset.id)] == [1, 2, 3] + assert [event.operation for event in reloaded.list_audit_events(target="asset:asset-readme")] == [ + "asset.create", + "asset.metadata.add", + "asset.lifecycle.transition", + ] + + +def test_sqlite_registry_enforces_representation_asset_reference(tmp_path: Path) -> None: + repo = SQLiteAssetRegistryRepository(tmp_path / "registry.sqlite") + representation = AssetRepresentation.from_content( + "missing-asset", + RepresentationKind.NORMALIZED, + "text/plain", + "normalized", + ) + + with pytest.raises(ValidationError, match="unknown asset"): + repo.save_representation(representation) + + +def operation_context() -> OperationContext: + actor = Actor.create( + ActorType.HUMAN, + actor_id="user-test", + display_name="Test User", + groups=["engineering"], + ) + return OperationContext.create(actor, correlation_id="corr-test") + + +class DenyLifecyclePolicy: + def authorize( + self, + context: OperationContext, + action: str, + resource: str, + *, + resource_metadata: dict[str, str] | None = None, + ) -> PolicyDecision: + if action == "asset.lifecycle.transition": + return PolicyDecision.fail_closed( + context.actor.id, + action, + resource, + reason="lifecycle transitions require review", + context={"resource_metadata": resource_metadata or {}}, + ) + return PolicyDecision.allow(context.actor.id, action, resource) + diff --git a/workplans/KONT-WP-0005-asset-registry-governance-state.md b/workplans/KONT-WP-0005-asset-registry-governance-state.md index 6b6280c..50acb75 100644 --- a/workplans/KONT-WP-0005-asset-registry-governance-state.md +++ b/workplans/KONT-WP-0005-asset-registry-governance-state.md @@ -45,6 +45,14 @@ adapter metadata on representations or versions. It must not make Markitect document classes canonical engine entities, and asset identity must remain independent of Markitect snapshot identity. +## Implementation Note + +The first registry slice is recorded in +`docs/asset-registry-implementation.md`. It establishes repository ports, +memory and SQLite adapters, and the asset registry service for create, +metadata, representation, lifecycle, policy, audit, versions, and durable +reload behavior. + ## G5.1 - Implement stable asset identity and source references ```task