diff --git a/README.md b/README.md index 9882e1c..ca4b4a9 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,7 @@ Start here: - `SCOPE.md` - `docs/knowledge-operations-roadmap.md` - `docs/architecture-blueprint.md` +- `docs/architecture-core-implementation.md` - `docs/stack-decision.md` - `docs/markitect-main-scope-assessment.md` - `docs/markitect-tool-reuse-boundary.md` diff --git a/docs/architecture-core-implementation.md b/docs/architecture-core-implementation.md new file mode 100644 index 0000000..986b246 --- /dev/null +++ b/docs/architecture-core-implementation.md @@ -0,0 +1,93 @@ +# Architecture Core Implementation Note + +Date: 2026-05-05 + +Status: first implementation slice for `KONT-WP-0004`. + +## Purpose + +This note records the initial implementation of the architecture core described +in `docs/architecture-blueprint.md`. It does not replace the older flat +runtime modules yet. Instead, it introduces canonical domain contracts that the +next workplans can build on while keeping existing tests and compatibility +exports stable. + +## Implemented Core Package + +New package: + +```text +src/kontextual_engine/core/ + actors.py + assets.py + audit.py + metadata.py + policy.py + primitives.py + provenance.py + relationships.py +``` + +Implemented primitives: + +- `KnowledgeAsset`, +- `SourceReference`, +- `AssetRepresentation`, +- `AssetVersion`, +- `Classification`, +- `MetadataRecord`, +- `Actor`, +- `OperationContext`, +- `PolicyDecision`, +- `AuditEvent`, +- `ContextEntity`, +- `CoreRelationship`, +- `DerivedArtifactLineage`. + +The package is deterministic, import-light, and has no dependency on HTTP, +SQLite, Markitect, LLM providers, or source-system SDKs. + +## Current Module Mapping + +| Existing module | V0.2 interpretation | Current posture | +| --- | --- | --- | +| `artifacts.py` | Early artifact and collection facade. | Reusable temporarily as compatibility layer; maps toward `KnowledgeAsset`, `AssetRepresentation`, and collection scope. | +| `relationships.py` | In-memory graph helper over artifact relationships. | Reusable as test helper; richer relationship contracts now start in `core.relationships`. | +| `ingestion.py` | First adapter-oriented ingestion facade. | Reusable for plain text and Markitect markdown adapter behavior; needs job model in `KONT-WP-0006`. | +| `query.py` | Early in-memory artifact query helper. | Reusable for tests; governed retrieval moves into `KONT-WP-0007`. | +| `workflows.py` | Early operation run and manifest contracts. | Reusable as a stepping stone; workflow services and run persistence move into `KONT-WP-0008`. | +| `context.py` | Early agent-facing context package helper. | Reusable as a facade; governed context packages move into `KONT-WP-0009`. | +| `storage.py` | In-memory repository for early artifacts. | Reusable for deterministic tests; durable repository begins in `KONT-WP-0005`. | + +## Architectural Decisions Captured + +- Asset identity is not derived from source path, filename, backend, or + representation. +- Source, normalized, and derived representations are separate records. +- Markitect snapshot IDs and adapter provenance belong in representation + metadata, not in engine identity. +- Actor context and correlation IDs are explicit inputs to material operations. +- Ambiguous permission state can be represented as `fail_closed`. +- Audit events carry actor, operation, target, outcome, correlation ID, and + optional policy decision. +- Derived artifact lineage carries source assets, source versions, + transformation run, actor, parameters, policy context, adapter provenance, + and output representation. +- Inferred metadata and confirmed metadata are distinguishable. + +## Test Coverage + +`tests/test_core_architecture.py` covers the first core contracts: + +- stable asset identity across source movement, +- distinct source, normalized, and derived representations, +- explicit actor, policy, and audit context, +- derived output version and lineage explanation, +- inferred versus confirmed metadata records. + +## Next Implementation Boundary + +The next workplan should build on these contracts rather than reusing the old +artifact model directly. The natural next step is `KONT-WP-0005`: asset +registry governance and durable state. + diff --git a/src/kontextual_engine/__init__.py b/src/kontextual_engine/__init__.py index ff32fd1..3f82c88 100644 --- a/src/kontextual_engine/__init__.py +++ b/src/kontextual_engine/__init__.py @@ -12,6 +12,30 @@ from .artifacts import ( content_digest, ) from .context import ContextAssembler, ContextItem, ContextPackage +from .core import ( + Actor, + ActorType, + AssetRepresentation, + AssetVersion, + AuditEvent, + AuditOutcome, + Classification, + ContextEntity, + ContextEntityType, + CoreRelationship, + DerivedArtifactLineage, + KnowledgeAsset, + LifecycleState, + MetadataRecord, + OperationContext, + PolicyDecision, + PolicyEffect, + RelationshipTargetKind, + RepresentationKind, + Sensitivity, + SourceReference, + VersionChangeType, +) from .errors import ( AdapterUnavailableError, Diagnostic, @@ -40,10 +64,21 @@ __all__ = [ "ArtifactMetadata", "ArtifactReference", "ArtifactType", + "Actor", + "ActorType", + "AssetRepresentation", + "AssetVersion", + "AuditEvent", + "AuditOutcome", + "Classification", "Collection", "ContextAssembler", + "ContextEntity", + "ContextEntityType", "ContextItem", "ContextPackage", + "CoreRelationship", + "DerivedArtifactLineage", "Diagnostic", "DuplicateResourceError", "InMemoryKnowledgeRepository", @@ -51,18 +86,29 @@ __all__ = [ "IngestionResult", "IngestionService", "InputBundle", + "KnowledgeAsset", "KontextualError", + "LifecycleState", + "MetadataRecord", "NotFoundError", "OperationRun", "OperationStage", + "OperationContext", + "PolicyDecision", + "PolicyEffect", "QueryEngine", "QueryResult", "Relationship", "RelationshipGraph", + "RelationshipTargetKind", "RelationshipType", + "RepresentationKind", "RunManifest", "RunStatus", + "Sensitivity", + "SourceReference", "ValidationError", + "VersionChangeType", "WorkflowStep", "bundle_digest", "content_digest", diff --git a/src/kontextual_engine/core/__init__.py b/src/kontextual_engine/core/__init__.py new file mode 100644 index 0000000..f00a0b5 --- /dev/null +++ b/src/kontextual_engine/core/__init__.py @@ -0,0 +1,51 @@ +"""Domain core primitives for the knowledge operations architecture.""" + +from .actors import Actor, ActorType, OperationContext +from .assets import AssetRepresentation, KnowledgeAsset, RepresentationKind +from .audit import AuditEvent, AuditOutcome +from .metadata import Classification, LifecycleState, MetadataRecord, Sensitivity +from .policy import PolicyDecision, PolicyEffect +from .primitives import content_digest, mapping_digest, new_id, stable_json_dumps, utc_now +from .provenance import ( + AssetVersion, + DerivedArtifactLineage, + SourceReference, + VersionChangeType, +) +from .relationships import ( + ContextEntity, + ContextEntityType, + CoreRelationship, + RelationshipTargetKind, +) + +__all__ = [ + "Actor", + "ActorType", + "AssetRepresentation", + "AssetVersion", + "AuditEvent", + "AuditOutcome", + "Classification", + "ContextEntity", + "ContextEntityType", + "CoreRelationship", + "DerivedArtifactLineage", + "KnowledgeAsset", + "LifecycleState", + "MetadataRecord", + "OperationContext", + "PolicyDecision", + "PolicyEffect", + "RelationshipTargetKind", + "RepresentationKind", + "Sensitivity", + "SourceReference", + "VersionChangeType", + "content_digest", + "mapping_digest", + "new_id", + "stable_json_dumps", + "utc_now", +] + diff --git a/src/kontextual_engine/core/actors.py b/src/kontextual_engine/core/actors.py new file mode 100644 index 0000000..3f12e9c --- /dev/null +++ b/src/kontextual_engine/core/actors.py @@ -0,0 +1,113 @@ +"""Actor and operation-context primitives.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from enum import Enum +from typing import Any + +from .primitives import compact_dict, new_id + + +class ActorType(str, Enum): + HUMAN = "human" + APPLICATION = "application" + AUTOMATION = "automation" + SERVICE_ACCOUNT = "service_account" + AI_AGENT = "ai_agent" + + +@dataclass(frozen=True) +class Actor: + id: str + actor_type: ActorType + display_name: str | None = None + external_ref: str | None = None + groups: tuple[str, ...] = () + metadata: dict[str, Any] = field(default_factory=dict) + + @classmethod + def create( + cls, + actor_type: ActorType | str, + *, + actor_id: str | None = None, + display_name: str | None = None, + external_ref: str | None = None, + groups: list[str] | tuple[str, ...] | None = None, + metadata: dict[str, Any] | None = None, + ) -> "Actor": + return cls( + id=actor_id or new_id("actor"), + actor_type=ActorType(actor_type), + display_name=display_name, + external_ref=external_ref, + groups=tuple(groups or ()), + metadata=dict(metadata or {}), + ) + + def to_dict(self) -> dict[str, Any]: + return compact_dict( + { + "id": self.id, + "actor_type": self.actor_type.value, + "display_name": self.display_name, + "external_ref": self.external_ref, + "groups": list(self.groups), + "metadata": dict(self.metadata), + } + ) + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "Actor": + return cls( + id=data["id"], + actor_type=ActorType(data["actor_type"]), + display_name=data.get("display_name"), + external_ref=data.get("external_ref"), + groups=tuple(data.get("groups", [])), + metadata=dict(data.get("metadata", {})), + ) + + +@dataclass(frozen=True) +class OperationContext: + actor: Actor + correlation_id: str + delegated_actor: Actor | None = None + request_scope: dict[str, Any] = field(default_factory=dict) + policy_scope: dict[str, Any] = field(default_factory=dict) + metadata: dict[str, Any] = field(default_factory=dict) + + @classmethod + def create( + cls, + actor: Actor, + *, + correlation_id: str | None = None, + delegated_actor: Actor | None = None, + request_scope: dict[str, Any] | None = None, + policy_scope: dict[str, Any] | None = None, + metadata: dict[str, Any] | None = None, + ) -> "OperationContext": + return cls( + actor=actor, + delegated_actor=delegated_actor, + correlation_id=correlation_id or new_id("corr"), + request_scope=dict(request_scope or {}), + policy_scope=dict(policy_scope or {}), + metadata=dict(metadata or {}), + ) + + def to_dict(self) -> dict[str, Any]: + return compact_dict( + { + "actor": self.actor.to_dict(), + "delegated_actor": self.delegated_actor.to_dict() if self.delegated_actor else None, + "correlation_id": self.correlation_id, + "request_scope": dict(self.request_scope), + "policy_scope": dict(self.policy_scope), + "metadata": dict(self.metadata), + } + ) + diff --git a/src/kontextual_engine/core/assets.py b/src/kontextual_engine/core/assets.py new file mode 100644 index 0000000..77f2f54 --- /dev/null +++ b/src/kontextual_engine/core/assets.py @@ -0,0 +1,147 @@ +"""Canonical knowledge asset and representation models.""" + +from __future__ import annotations + +from dataclasses import dataclass, field, replace +from enum import Enum +from typing import Any + +from .metadata import Classification, LifecycleState +from .primitives import compact_dict, content_digest, new_id, utc_now +from .provenance import SourceReference + + +class RepresentationKind(str, Enum): + SOURCE = "source" + NORMALIZED = "normalized" + DERIVED = "derived" + + +@dataclass(frozen=True) +class AssetRepresentation: + asset_id: str + kind: RepresentationKind + media_type: str + digest: str + size_bytes: int + storage_ref: str | None = None + producer: str | None = None + source_ref_id: str | None = None + metadata: dict[str, Any] = field(default_factory=dict) + representation_id: str = field(default_factory=lambda: new_id("repr")) + created_at: str = field(default_factory=lambda: utc_now().isoformat()) + + @classmethod + def from_content( + cls, + asset_id: str, + kind: RepresentationKind | str, + media_type: str, + content: str | bytes, + *, + storage_ref: str | None = None, + producer: str | None = None, + source_ref_id: str | None = None, + metadata: dict[str, Any] | None = None, + representation_id: str | None = None, + ) -> "AssetRepresentation": + data = content.encode("utf-8") if isinstance(content, str) else content + return cls( + representation_id=representation_id or new_id("repr"), + asset_id=asset_id, + kind=RepresentationKind(kind), + media_type=media_type, + digest=content_digest(data), + size_bytes=len(data), + storage_ref=storage_ref, + producer=producer, + source_ref_id=source_ref_id, + metadata=dict(metadata or {}), + ) + + def to_dict(self) -> dict[str, Any]: + return compact_dict( + { + "representation_id": self.representation_id, + "asset_id": self.asset_id, + "kind": self.kind.value, + "media_type": self.media_type, + "digest": self.digest, + "size_bytes": self.size_bytes, + "storage_ref": self.storage_ref, + "producer": self.producer, + "source_ref_id": self.source_ref_id, + "metadata": dict(self.metadata), + "created_at": self.created_at, + } + ) + + +@dataclass(frozen=True) +class KnowledgeAsset: + id: str + title: str + classification: Classification + source_refs: tuple[SourceReference, ...] = () + aliases: tuple[str, ...] = () + current_version_id: str | None = None + lifecycle: LifecycleState = LifecycleState.ACTIVE + metadata: dict[str, Any] = field(default_factory=dict) + created_at: str = field(default_factory=lambda: utc_now().isoformat()) + updated_at: str = field(default_factory=lambda: utc_now().isoformat()) + + @classmethod + def create( + cls, + title: str, + classification: Classification, + *, + asset_id: str | None = None, + source_refs: list[SourceReference] | tuple[SourceReference, ...] | None = None, + aliases: list[str] | tuple[str, ...] | None = None, + metadata: dict[str, Any] | None = None, + ) -> "KnowledgeAsset": + return cls( + id=asset_id or new_id("asset"), + title=title, + classification=classification, + source_refs=tuple(source_refs or ()), + aliases=tuple(aliases or ()), + metadata=dict(metadata or {}), + lifecycle=classification.lifecycle, + ) + + def with_source_reference(self, source_ref: SourceReference) -> "KnowledgeAsset": + return replace(self, source_refs=self.source_refs + (source_ref,), updated_at=utc_now().isoformat()) + + def with_alias(self, alias: str) -> "KnowledgeAsset": + if alias in self.aliases: + return self + return replace(self, aliases=self.aliases + (alias,), updated_at=utc_now().isoformat()) + + def transition_lifecycle(self, lifecycle: LifecycleState | str) -> "KnowledgeAsset": + lifecycle_state = LifecycleState(lifecycle) + classification = replace(self.classification, lifecycle=lifecycle_state) + return replace( + self, + lifecycle=lifecycle_state, + classification=classification, + updated_at=utc_now().isoformat(), + ) + + def to_dict(self) -> dict[str, Any]: + return compact_dict( + { + "id": self.id, + "title": self.title, + "classification": self.classification.to_dict(), + "source_refs": [source_ref.to_dict() for source_ref in self.source_refs], + "aliases": list(self.aliases), + "current_version_id": self.current_version_id, + "lifecycle": self.lifecycle.value, + "metadata": dict(self.metadata), + "created_at": self.created_at, + "updated_at": self.updated_at, + } + ) + diff --git a/src/kontextual_engine/core/audit.py b/src/kontextual_engine/core/audit.py new file mode 100644 index 0000000..1865051 --- /dev/null +++ b/src/kontextual_engine/core/audit.py @@ -0,0 +1,72 @@ +"""Audit records for material engine operations.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from enum import Enum +from typing import Any + +from .actors import OperationContext +from .policy import PolicyDecision +from .primitives import compact_dict, new_id, utc_now + + +class AuditOutcome(str, Enum): + SUCCESS = "success" + DENIED = "denied" + FAILED = "failed" + PARTIAL = "partial" + REVIEW_REQUIRED = "review_required" + DRY_RUN = "dry_run" + + +@dataclass(frozen=True) +class AuditEvent: + operation: str + target: str + outcome: AuditOutcome + actor_id: str + correlation_id: str + event_id: str = field(default_factory=lambda: new_id("audit")) + policy_decision: PolicyDecision | None = None + details: dict[str, Any] = field(default_factory=dict) + occurred_at: str = field(default_factory=lambda: utc_now().isoformat()) + + @classmethod + def from_context( + cls, + operation: str, + target: str, + outcome: AuditOutcome | str, + context: OperationContext, + *, + policy_decision: PolicyDecision | None = None, + details: dict[str, Any] | None = None, + event_id: str | None = None, + ) -> "AuditEvent": + return cls( + event_id=event_id or new_id("audit"), + operation=operation, + target=target, + outcome=AuditOutcome(outcome), + actor_id=context.actor.id, + correlation_id=context.correlation_id, + policy_decision=policy_decision, + details=dict(details or {}), + ) + + def to_dict(self) -> dict[str, Any]: + return compact_dict( + { + "event_id": self.event_id, + "operation": self.operation, + "target": self.target, + "outcome": self.outcome.value, + "actor_id": self.actor_id, + "correlation_id": self.correlation_id, + "policy_decision": self.policy_decision.to_dict() if self.policy_decision else None, + "details": dict(self.details), + "occurred_at": self.occurred_at, + } + ) + diff --git a/src/kontextual_engine/core/metadata.py b/src/kontextual_engine/core/metadata.py new file mode 100644 index 0000000..c3707a3 --- /dev/null +++ b/src/kontextual_engine/core/metadata.py @@ -0,0 +1,74 @@ +"""Metadata, classification, and lifecycle primitives.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from enum import Enum +from typing import Any + +from .primitives import compact_dict, new_id, utc_now + + +class LifecycleState(str, Enum): + DRAFT = "draft" + ACTIVE = "active" + QUARANTINED = "quarantined" + RETIRED = "retired" + DELETE_REQUESTED = "delete_requested" + DELETED = "deleted" + + +class Sensitivity(str, Enum): + PUBLIC = "public" + INTERNAL = "internal" + CONFIDENTIAL = "confidential" + RESTRICTED = "restricted" + + +@dataclass(frozen=True) +class Classification: + asset_type: str + sensitivity: Sensitivity = Sensitivity.INTERNAL + lifecycle: LifecycleState = LifecycleState.ACTIVE + topics: tuple[str, ...] = () + owner: str | None = None + review_state: str | None = None + metadata: dict[str, Any] = field(default_factory=dict) + + def to_dict(self) -> dict[str, Any]: + return compact_dict( + { + "asset_type": self.asset_type, + "sensitivity": self.sensitivity.value, + "lifecycle": self.lifecycle.value, + "topics": list(self.topics), + "owner": self.owner, + "review_state": self.review_state, + "metadata": dict(self.metadata), + } + ) + + +@dataclass(frozen=True) +class MetadataRecord: + key: str + value: Any + provenance: dict[str, Any] = field(default_factory=dict) + confidence: float | None = None + confirmed: bool = False + record_id: str = field(default_factory=lambda: new_id("meta")) + created_at: str = field(default_factory=lambda: utc_now().isoformat()) + + def to_dict(self) -> dict[str, Any]: + return compact_dict( + { + "record_id": self.record_id, + "key": self.key, + "value": self.value, + "provenance": dict(self.provenance), + "confidence": self.confidence, + "confirmed": self.confirmed, + "created_at": self.created_at, + } + ) + diff --git a/src/kontextual_engine/core/policy.py b/src/kontextual_engine/core/policy.py new file mode 100644 index 0000000..26d2001 --- /dev/null +++ b/src/kontextual_engine/core/policy.py @@ -0,0 +1,79 @@ +"""Policy decision primitives for permission-aware operations.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from enum import Enum +from typing import Any + +from .primitives import compact_dict, new_id, utc_now + + +class PolicyEffect(str, Enum): + ALLOW = "allow" + DENY = "deny" + REDACT = "redact" + REQUIRE_REVIEW = "require_review" + DRY_RUN_ONLY = "dry_run_only" + FAIL_CLOSED = "fail_closed" + + +@dataclass(frozen=True) +class PolicyDecision: + effect: PolicyEffect + subject_id: str + action: str + resource: str + reason: str = "" + decision_id: str = field(default_factory=lambda: new_id("policy")) + obligations: dict[str, Any] = field(default_factory=dict) + context: dict[str, Any] = field(default_factory=dict) + decided_at: str = field(default_factory=lambda: utc_now().isoformat()) + + @classmethod + def allow(cls, subject_id: str, action: str, resource: str, **kwargs: Any) -> "PolicyDecision": + return cls(PolicyEffect.ALLOW, subject_id, action, resource, **kwargs) + + @classmethod + def deny( + cls, + subject_id: str, + action: str, + resource: str, + *, + reason: str, + **kwargs: Any, + ) -> "PolicyDecision": + return cls(PolicyEffect.DENY, subject_id, action, resource, reason=reason, **kwargs) + + @classmethod + def fail_closed( + cls, + subject_id: str, + action: str, + resource: str, + *, + reason: str = "Permission context is missing or ambiguous", + **kwargs: Any, + ) -> "PolicyDecision": + return cls(PolicyEffect.FAIL_CLOSED, subject_id, action, resource, reason=reason, **kwargs) + + @property + def allowed(self) -> bool: + return self.effect == PolicyEffect.ALLOW + + def to_dict(self) -> dict[str, Any]: + return compact_dict( + { + "decision_id": self.decision_id, + "effect": self.effect.value, + "subject_id": self.subject_id, + "action": self.action, + "resource": self.resource, + "reason": self.reason, + "obligations": dict(self.obligations), + "context": dict(self.context), + "decided_at": self.decided_at, + } + ) + diff --git a/src/kontextual_engine/core/primitives.py b/src/kontextual_engine/core/primitives.py new file mode 100644 index 0000000..b408c93 --- /dev/null +++ b/src/kontextual_engine/core/primitives.py @@ -0,0 +1,43 @@ +"""Small deterministic primitives used by the domain core.""" + +from __future__ import annotations + +import hashlib +import json +import uuid +from datetime import datetime, timezone +from typing import Any + + +def utc_now() -> datetime: + return datetime.now(timezone.utc) + + +def new_id(prefix: str) -> str: + return f"{prefix}_{uuid.uuid4().hex}" + + +def stable_json_dumps(value: Any) -> str: + return json.dumps(value, sort_keys=True, separators=(",", ":"), default=str) + + +def content_digest(content: str | bytes) -> str: + data = content.encode("utf-8") if isinstance(content, str) else content + return "sha256:" + hashlib.sha256(data).hexdigest() + + +def mapping_digest(value: Any) -> str: + return content_digest(stable_json_dumps(value)) + + +def compact_dict(data: dict[str, Any]) -> dict[str, Any]: + return {key: value for key, value in data.items() if value not in (None, {}, [])} + + +def datetime_to_str(value: datetime | None) -> str | None: + return value.isoformat() if value else None + + +def datetime_from_str(value: str | None) -> datetime | None: + return datetime.fromisoformat(value) if value else None + diff --git a/src/kontextual_engine/core/provenance.py b/src/kontextual_engine/core/provenance.py new file mode 100644 index 0000000..80dfb34 --- /dev/null +++ b/src/kontextual_engine/core/provenance.py @@ -0,0 +1,132 @@ +"""Source, version, and lineage primitives.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from enum import Enum +from typing import Any + +from .primitives import compact_dict, mapping_digest, new_id, utc_now + + +@dataclass(frozen=True) +class SourceReference: + source_system: str + path: str | None = None + uri: str | None = None + external_id: str | None = None + checksum: str | None = None + connector_ref: str | None = None + metadata: dict[str, Any] = field(default_factory=dict) + id: str = field(default_factory=lambda: new_id("src")) + + @property + def identity_key(self) -> str: + return mapping_digest( + { + "source_system": self.source_system, + "path": self.path, + "uri": self.uri, + "external_id": self.external_id, + "checksum": self.checksum, + "connector_ref": self.connector_ref, + } + ) + + def to_dict(self) -> dict[str, Any]: + return compact_dict( + { + "id": self.id, + "source_system": self.source_system, + "path": self.path, + "uri": self.uri, + "external_id": self.external_id, + "checksum": self.checksum, + "connector_ref": self.connector_ref, + "identity_key": self.identity_key, + "metadata": dict(self.metadata), + } + ) + + +class VersionChangeType(str, Enum): + CREATED = "created" + CONTENT_CHANGED = "content_changed" + METADATA_CHANGED = "metadata_changed" + RELATIONSHIP_CHANGED = "relationship_changed" + LIFECYCLE_CHANGED = "lifecycle_changed" + DERIVED_OUTPUT = "derived_output" + RESTORED = "restored" + SUPERSEDED = "superseded" + + +@dataclass(frozen=True) +class AssetVersion: + asset_id: str + sequence: int + change_type: VersionChangeType + representation_ids: tuple[str, ...] = () + actor_id: str | None = None + operation_id: str | None = None + parent_version_id: str | None = None + metadata_delta: dict[str, Any] = field(default_factory=dict) + relationship_delta: dict[str, Any] = field(default_factory=dict) + lifecycle: str | None = None + version_id: str = field(default_factory=lambda: new_id("ver")) + created_at: str = field(default_factory=lambda: utc_now().isoformat()) + + def to_dict(self) -> dict[str, Any]: + return compact_dict( + { + "version_id": self.version_id, + "asset_id": self.asset_id, + "sequence": self.sequence, + "change_type": self.change_type.value, + "representation_ids": list(self.representation_ids), + "actor_id": self.actor_id, + "operation_id": self.operation_id, + "parent_version_id": self.parent_version_id, + "metadata_delta": dict(self.metadata_delta), + "relationship_delta": dict(self.relationship_delta), + "lifecycle": self.lifecycle, + "created_at": self.created_at, + } + ) + + +@dataclass(frozen=True) +class DerivedArtifactLineage: + source_asset_ids: tuple[str, ...] + source_version_ids: tuple[str, ...] + transformation_run_id: str + output_asset_id: str + output_representation_id: str + actor_id: str + parameters: dict[str, Any] = field(default_factory=dict) + policy_context: dict[str, Any] = field(default_factory=dict) + adapter_provenance: dict[str, Any] = field(default_factory=dict) + lineage_id: str = field(default_factory=lambda: new_id("lineage")) + + @property + def lineage_hash(self) -> str: + return mapping_digest(self.to_dict(include_hash=False)) + + def to_dict(self, *, include_hash: bool = True) -> dict[str, Any]: + data = compact_dict( + { + "lineage_id": self.lineage_id, + "source_asset_ids": list(self.source_asset_ids), + "source_version_ids": list(self.source_version_ids), + "transformation_run_id": self.transformation_run_id, + "output_asset_id": self.output_asset_id, + "output_representation_id": self.output_representation_id, + "actor_id": self.actor_id, + "parameters": dict(self.parameters), + "policy_context": dict(self.policy_context), + "adapter_provenance": dict(self.adapter_provenance), + } + ) + if include_hash: + data["lineage_hash"] = self.lineage_hash + return data + diff --git a/src/kontextual_engine/core/relationships.py b/src/kontextual_engine/core/relationships.py new file mode 100644 index 0000000..78f74d1 --- /dev/null +++ b/src/kontextual_engine/core/relationships.py @@ -0,0 +1,82 @@ +"""Context entity and typed relationship primitives.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from enum import Enum +from typing import Any + +from .primitives import compact_dict, new_id, utc_now + + +class ContextEntityType(str, Enum): + PERSON = "person" + TEAM = "team" + PROJECT = "project" + CASE = "case" + CUSTOMER = "customer" + PRODUCT = "product" + PROCESS = "process" + SOURCE_SYSTEM = "source_system" + TOPIC = "topic" + BUSINESS_OBJECT = "business_object" + + +@dataclass(frozen=True) +class ContextEntity: + entity_type: ContextEntityType + name: str + external_ref: str | None = None + metadata: dict[str, Any] = field(default_factory=dict) + entity_id: str = field(default_factory=lambda: new_id("entity")) + + def to_dict(self) -> dict[str, Any]: + return compact_dict( + { + "entity_id": self.entity_id, + "entity_type": self.entity_type.value, + "name": self.name, + "external_ref": self.external_ref, + "metadata": dict(self.metadata), + } + ) + + +class RelationshipTargetKind(str, Enum): + ASSET = "asset" + CONTEXT_ENTITY = "context_entity" + + +@dataclass(frozen=True) +class CoreRelationship: + source_id: str + target_id: str + predicate: str + target_kind: RelationshipTargetKind = RelationshipTargetKind.ASSET + direction: str = "outbound" + confidence: float | None = None + valid_from: str | None = None + valid_to: str | None = None + actor_id: str | None = None + provenance: dict[str, Any] = field(default_factory=dict) + relationship_id: str = field(default_factory=lambda: new_id("rel")) + created_at: str = field(default_factory=lambda: utc_now().isoformat()) + + def to_dict(self) -> dict[str, Any]: + return compact_dict( + { + "relationship_id": self.relationship_id, + "source_id": self.source_id, + "target_id": self.target_id, + "predicate": self.predicate, + "target_kind": self.target_kind.value, + "direction": self.direction, + "confidence": self.confidence, + "valid_from": self.valid_from, + "valid_to": self.valid_to, + "actor_id": self.actor_id, + "provenance": dict(self.provenance), + "created_at": self.created_at, + } + ) + diff --git a/tests/test_core_architecture.py b/tests/test_core_architecture.py new file mode 100644 index 0000000..b7293c8 --- /dev/null +++ b/tests/test_core_architecture.py @@ -0,0 +1,178 @@ +from kontextual_engine.core import ( + Actor, + ActorType, + AssetRepresentation, + AssetVersion, + AuditEvent, + AuditOutcome, + Classification, + DerivedArtifactLineage, + KnowledgeAsset, + LifecycleState, + MetadataRecord, + OperationContext, + PolicyDecision, + PolicyEffect, + RepresentationKind, + Sensitivity, + SourceReference, + VersionChangeType, +) + + +def test_knowledge_asset_identity_is_independent_of_source_and_representation() -> None: + source = SourceReference(source_system="repo", path="docs/intent.md", checksum="sha256:source") + classification = Classification( + asset_type="document", + sensitivity=Sensitivity.INTERNAL, + lifecycle=LifecycleState.ACTIVE, + owner="Platform Knowledge", + ) + asset = KnowledgeAsset.create( + "Intent", + classification, + asset_id="asset-intent", + source_refs=[source], + aliases=["INTENT.md"], + ) + + moved = asset.with_source_reference( + SourceReference(source_system="repo", path="wiki/INTENT.md", checksum="sha256:source") + ) + normalized = AssetRepresentation.from_content( + asset.id, + RepresentationKind.NORMALIZED, + "text/markdown+normalized", + "# Intent\n\nNormalized body.", + producer="markitect-tool", + ) + + assert moved.id == asset.id + assert moved.source_refs[0].path == "docs/intent.md" + assert moved.source_refs[1].path == "wiki/INTENT.md" + assert normalized.asset_id == asset.id + assert normalized.kind == RepresentationKind.NORMALIZED + + +def test_representations_keep_source_normalized_and_derived_forms_distinct() -> None: + source = AssetRepresentation.from_content( + "asset-1", + RepresentationKind.SOURCE, + "text/markdown", + "# Source\n", + storage_ref="object://source", + ) + normalized = AssetRepresentation.from_content( + "asset-1", + RepresentationKind.NORMALIZED, + "application/vnd.kontextual.normalized+json", + '{"text":"Source"}', + producer="markitect-tool", + metadata={"adapter_snapshot_id": "snapshot:123"}, + ) + derived = AssetRepresentation.from_content( + "asset-2", + RepresentationKind.DERIVED, + "text/markdown", + "# Summary\n", + producer="summarize", + ) + + assert source.kind == RepresentationKind.SOURCE + assert normalized.kind == RepresentationKind.NORMALIZED + assert derived.kind == RepresentationKind.DERIVED + assert source.digest != normalized.digest + assert normalized.metadata["adapter_snapshot_id"] == "snapshot:123" + + +def test_actor_policy_and_audit_context_are_explicit() -> None: + actor = Actor.create( + ActorType.AI_AGENT, + actor_id="agent-codex", + display_name="Codex", + groups=["engineering"], + ) + context = OperationContext.create( + actor, + correlation_id="corr-123", + policy_scope={"sensitivity": "internal"}, + ) + decision = PolicyDecision.fail_closed( + actor.id, + "retrieve", + "asset:secret", + reason="missing permission envelope", + ) + event = AuditEvent.from_context( + "retrieve", + "asset:secret", + AuditOutcome.DENIED, + context, + policy_decision=decision, + ) + + assert decision.effect == PolicyEffect.FAIL_CLOSED + assert decision.allowed is False + assert event.actor_id == "agent-codex" + assert event.correlation_id == "corr-123" + assert event.to_dict()["policy_decision"]["reason"] == "missing permission envelope" + + +def test_versions_and_lineage_explain_derived_artifacts() -> None: + actor = Actor.create(ActorType.HUMAN, actor_id="user-1", display_name="User") + source_version = AssetVersion( + asset_id="asset-source", + sequence=1, + change_type=VersionChangeType.CREATED, + representation_ids=("repr-source",), + actor_id=actor.id, + ) + derived_version = AssetVersion( + asset_id="asset-derived", + sequence=1, + change_type=VersionChangeType.DERIVED_OUTPUT, + representation_ids=("repr-derived",), + actor_id=actor.id, + operation_id="run-summary", + parent_version_id=source_version.version_id, + ) + lineage = DerivedArtifactLineage( + source_asset_ids=("asset-source",), + source_version_ids=(source_version.version_id,), + transformation_run_id="run-summary", + output_asset_id="asset-derived", + output_representation_id="repr-derived", + actor_id=actor.id, + parameters={"style": "brief"}, + policy_context={"effect": "allow"}, + adapter_provenance={"operation": "summarize"}, + ) + + data = lineage.to_dict() + + assert derived_version.parent_version_id == source_version.version_id + assert data["source_asset_ids"] == ["asset-source"] + assert data["source_version_ids"] == [source_version.version_id] + assert data["parameters"] == {"style": "brief"} + assert data["policy_context"] == {"effect": "allow"} + assert data["lineage_hash"].startswith("sha256:") + + +def test_metadata_records_distinguish_inferred_and_confirmed_values() -> None: + inferred = MetadataRecord( + "topic", + "architecture", + provenance={"producer": "classifier"}, + confidence=0.74, + confirmed=False, + ) + confirmed = MetadataRecord( + "owner", + "Platform Knowledge", + provenance={"producer": "human"}, + confirmed=True, + ) + + assert inferred.to_dict()["confidence"] == 0.74 + assert inferred.to_dict()["confirmed"] is False + assert confirmed.to_dict()["confirmed"] is True diff --git a/workplans/KONT-WP-0004-knowledge-operations-architecture.md b/workplans/KONT-WP-0004-knowledge-operations-architecture.md index adc3d10..186daef 100644 --- a/workplans/KONT-WP-0004-knowledge-operations-architecture.md +++ b/workplans/KONT-WP-0004-knowledge-operations-architecture.md @@ -36,6 +36,8 @@ workflow state, exportability, and agent-safe operation from the start. - `docs/markitect-tool-reuse-boundary.md` and `docs/markitect-tool-integration-usecases.md` as the explicit boundary between markdown syntax tooling and the engine runtime. +- `docs/architecture-core-implementation.md` as the first code-backed domain + core implementation note. - Architecture decision notes for the P0 capability baseline. - Traceability from PRD/FRS V0.2 requirements to implementation workplans. - Revised implementation sequence for `KONT-WP-0005` through `KONT-WP-0010`.