From dbe93be1a9e7b08228625bade0bf653f149ccebb Mon Sep 17 00:00:00 2001 From: tegwick Date: Wed, 6 May 2026 04:03:50 +0200 Subject: [PATCH] Markitect schema-validation integration use case and fixture for Markdown proxy documents --- docs/architecture-blueprint.md | 5 + docs/asset-registry-implementation.md | 10 +- docs/markitect-tool-integration-usecases.md | 29 +++++ docs/markitect-tool-reuse-boundary.md | 11 +- .../schemas/adr-proxy.schema.md | 38 ++++++ src/kontextual_engine/__init__.py | 2 + .../adapters/memory/asset_registry.py | 40 ++++++ .../adapters/sqlite/asset_registry.py | 83 +++++++++++++ src/kontextual_engine/core/__init__.py | 2 + src/kontextual_engine/core/metadata.py | 57 +++++++++ src/kontextual_engine/ports/repositories.py | 12 ++ .../services/asset_service.py | 72 ++++++++++- tests/test_asset_registry.py | 116 ++++++++++++++++++ tests/test_core_architecture.py | 23 ++++ tests/test_markitect_tool_contract.py | 14 +++ ...WP-0005-asset-registry-governance-state.md | 16 ++- 16 files changed, 518 insertions(+), 12 deletions(-) create mode 100644 examples/markitect-tool-contract/schemas/adr-proxy.schema.md diff --git a/docs/architecture-blueprint.md b/docs/architecture-blueprint.md index 18d0abc..0da6492 100644 --- a/docs/architecture-blueprint.md +++ b/docs/architecture-blueprint.md @@ -206,6 +206,11 @@ Adapter rules: checks, and context-package interoperability. Engine domain code must not import it directly; adapter code should persist serializable Markitect outputs as adapter provenance or representation metadata. +- Markdown proxy documents are allowed as adapter projections for managed + assets. They can make every asset inspectable and contract-checkable through + Markitect where useful, but they are not the canonical engine identity or + storage model. The canonical layer remains asset, representation, metadata, + lifecycle, policy, lineage, and audit state. - `llm-connect` or equivalent is an adapter for LLM providers. - `phase-memory` is an adjacent memory runtime; this engine may exchange opaque memory references or context packages but should not implement memory phases. diff --git a/docs/asset-registry-implementation.md b/docs/asset-registry-implementation.md index 80d16c9..df903f2 100644 --- a/docs/asset-registry-implementation.md +++ b/docs/asset-registry-implementation.md @@ -35,6 +35,8 @@ and SQLite repositories are adapters behind those ports. - `MetadataRecord` persistence with inferred/confirmed semantics preserved. - Custom metadata schema primitives with structured validation issues. - Metadata schema validation before asset create and metadata update writes. +- Durable metadata schema registry and assignment rules for policy-selected + validation. - Actor and `OperationContext` required for material mutations. - Policy gateway authorization before asset mutations. - Fail-closed policy denial through `AuthorizationError`. @@ -59,6 +61,8 @@ and SQLite repositories are adapters behind those ports. - `assets` - `representations` - `metadata_records` +- `metadata_schemas` +- `metadata_schema_assignments` - `context_entities` - `core_relationships` - `asset_versions` @@ -72,7 +76,6 @@ idempotency key. ## Not Yet Implemented -- Schema registry persistence and policy-assigned schema selection. - Standard metadata filtering beyond lifecycle and asset type. - Policy assignment storage and enterprise policy adapters. - Conflict detection beyond version-sequence uniqueness. @@ -90,9 +93,10 @@ These remain in scope for later `KONT-WP-0005` tasks or adjacent workplans. - lifecycle denial with fail-closed policy and denied audit event, - SQLite reload preserving asset lifecycle, representation, metadata, versions, and audit history, -- SQLite referential integrity for representation asset references. +- SQLite referential integrity for representation asset references, - idempotent asset creation and conflicting idempotency-key reuse, - relationship creation with source-asset versioning and audit, - SQLite reload preserving context entities, relationships, and idempotency records, -- custom metadata schema validation before registry writes. +- custom metadata schema validation before registry writes, +- persistent metadata schema registry and assignment reload behavior. diff --git a/docs/markitect-tool-integration-usecases.md b/docs/markitect-tool-integration-usecases.md index bca54ed..b0588de 100644 --- a/docs/markitect-tool-integration-usecases.md +++ b/docs/markitect-tool-integration-usecases.md @@ -218,6 +218,34 @@ Engine expectation: - The engine owns workflow templates, run state, retries, review gates, exceptions, audit, and derived artifacts. +## Use Case 7: Markdown Proxy Schema Validation + +Intent: validate Markdown source or proxy documents through Markitect document +schemas instead of adding a second Markdown schema validator to the engine. + +Expected Markitect APIs: + +- `load_schema_file(...)` +- `validate_schema(...)` +- `validate_document(...)` +- `validate_markdown_file(...)` + +Example: + +```python +from markitect_tool import validate_markdown_file + +result = validate_markdown_file("asset-proxy.md", "asset-proxy.schema.md") +``` + +Engine expectation: + +- Markdown proxy documents are adapter representations of governed assets. +- Markitect owns Markdown document schema validation for those proxies. +- Engine metadata schema validation remains registry-owned because it governs + asset metadata records, confirmation state, policy assignment, write + rejection, and audit behavior. + ## Integration Test Matrix | Test area | Boundary protected | @@ -228,6 +256,7 @@ Engine expectation: | Snapshot identity | Engine stores Markitect snapshot metadata without owning the algorithm. | | Context package policy filtering | Agent context can reuse Markitect packages and local label policy. | | Document contracts | Markdown validation can call Markitect contracts without moving contract semantics into the engine. | +| Markdown document schemas | Markdown source/proxy validation uses Markitect schema APIs instead of duplicating them. | | Capacity sentinels | Larger generated examples expose likely parser, selector, include, context-package, and snapshot bottlenecks. | These tests are intentionally small but example-backed. They are not a diff --git a/docs/markitect-tool-reuse-boundary.md b/docs/markitect-tool-reuse-boundary.md index 00e6579..3bccff0 100644 --- a/docs/markitect-tool-reuse-boundary.md +++ b/docs/markitect-tool-reuse-boundary.md @@ -21,6 +21,13 @@ state should persist serializable envelopes, source references, digests, lineage, policy decisions, and audit events rather than storing Markitect runtime objects as canonical engine entities. +Markdown proxy documents are a supported adapter pattern. The engine may create +or store Markdown representations that proxy non-Markdown assets so Markitect +selectors, contracts, document schemas, functions, and workflows can operate on +them. Those proxies are representations of governed assets, not replacements +for engine-owned asset identity, metadata, lifecycle, policy, lineage, or audit +state. + Required integration behavior is captured in `docs/markitect-tool-integration-usecases.md` and exercised by `tests/test_markitect_tool_contract.py`. These tests are allowed to skip when @@ -35,6 +42,7 @@ stability checks for the boundary when the `markdown` extra is installed. | Document-level selectors and extraction | `markitect_tool.query`, `docs/query-extraction.md` | Use for markdown source extraction and context package creation. Engine query should operate over persisted artifacts and relationships. | | Deterministic transforms, composition, and includes | `markitect_tool.ops.engine`, `docs/transform-compose-include.md` | Treat as external operations invoked by workflows. Store operation provenance and derived artifacts in the engine. | | Contract checks, runtime context, forms, and assessments | `markitect_tool.contract.*`, `markitect_tool.runtime.*`, `docs/runtime-context-forms-assessments.md` | Use as validation/assessment step adapters. Engine owns run state and audit trail. | +| Markdown document schema validation | `markitect_tool.schema.*` | Use for Markdown document/proxy validation. Engine-owned asset metadata validation stays in the registry layer. | | Backend manifests, local snapshots, FTS, and query adapters | `markitect_tool.backend.*`, `docs/backend-fabric.md` | Reuse snapshot identity and local index concepts. Engine storage remains separate and cross-format. | | Agent working memory context packages | `markitect_tool.memory.engine`, `docs/agent-working-memory.md` | Reuse as a portable context-package format for markdown-backed context. Engine should provide durable context registries across formats. | | Workflow definition syntax and markdown-centered step kinds | `markitect_tool.workflow.*`, `docs/workflow-definition-standard.md` | Reuse where workflows consume markdown inputs. Engine workflows should generalize to artifact collections, external tools, and service operations. | @@ -44,7 +52,8 @@ stability checks for the boundary when the `markdown` extra is installed. ## Adapter Ownership Rules - Markdown ingestion adapters may call `parse_markdown`, `parse_markdown_file`, - `query_document`, `extract_document`, and `snapshot_identity_for_file`. + `query_document`, `extract_document`, `validate_document`, + `validate_markdown_file`, and `snapshot_identity_for_file`. - Markdown transformation adapters may call `transform_markdown`, `compose_files`, `resolve_includes`, Markitect contract checks, document functions, templates, and workflow helpers. diff --git a/examples/markitect-tool-contract/schemas/adr-proxy.schema.md b/examples/markitect-tool-contract/schemas/adr-proxy.schema.md new file mode 100644 index 0000000..f97e76f --- /dev/null +++ b/examples/markitect-tool-contract/schemas/adr-proxy.schema.md @@ -0,0 +1,38 @@ +--- +schema-id: "kontextual-engine.markdown-proxy.adr.v1" +version: "1.0.0" +status: "example" +--- + +# ADR Proxy Document Schema + +```json +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "ADR Proxy Document", + "type": "object", + "required": ["frontmatter", "headings"], + "properties": { + "frontmatter": { + "type": "object", + "required": ["document_type", "status", "owner"], + "properties": { + "document_type": {"const": "adr"}, + "status": {"enum": ["proposed", "accepted", "deprecated", "superseded"]}, + "owner": {"type": "string", "minLength": 1} + } + }, + "headings": { + "type": "array", + "minItems": 3, + "contains": { + "type": "object", + "required": ["text"], + "properties": { + "text": {"const": "Decision"} + } + } + } + } +} +``` diff --git a/src/kontextual_engine/__init__.py b/src/kontextual_engine/__init__.py index 7b7f60f..24d31e3 100644 --- a/src/kontextual_engine/__init__.py +++ b/src/kontextual_engine/__init__.py @@ -39,6 +39,7 @@ from .core import ( MetadataFieldDefinition, MetadataRecord, MetadataSchema, + MetadataSchemaAssignment, MetadataValidationIssue, MetadataValueType, NormalizedDocument, @@ -142,6 +143,7 @@ __all__ = [ "MetadataFieldDefinition", "MetadataRecord", "MetadataSchema", + "MetadataSchemaAssignment", "MetadataValidationIssue", "MetadataValueType", "NormalizedDocument", diff --git a/src/kontextual_engine/adapters/memory/asset_registry.py b/src/kontextual_engine/adapters/memory/asset_registry.py index b49a43a..e63a182 100644 --- a/src/kontextual_engine/adapters/memory/asset_registry.py +++ b/src/kontextual_engine/adapters/memory/asset_registry.py @@ -18,6 +18,8 @@ from kontextual_engine.core import ( KnowledgeAsset, LifecycleState, MetadataRecord, + MetadataSchema, + MetadataSchemaAssignment, RepresentationKind, ) from kontextual_engine.errors import NotFoundError, ValidationError @@ -29,6 +31,8 @@ class InMemoryAssetRegistryRepository: assets: dict[str, KnowledgeAsset] = field(default_factory=dict) representations: dict[str, AssetRepresentation] = field(default_factory=dict) metadata_records: dict[str, list[MetadataRecord]] = field(default_factory=dict) + metadata_schemas: dict[str, MetadataSchema] = field(default_factory=dict) + metadata_schema_assignments: dict[str, MetadataSchemaAssignment] = field(default_factory=dict) context_entities: dict[str, ContextEntity] = field(default_factory=dict) relationships: dict[str, CoreRelationship] = field(default_factory=dict) versions: dict[str, list[AssetVersion]] = field(default_factory=dict) @@ -105,6 +109,42 @@ class InMemoryAssetRegistryRepository: self.get_asset(asset_id) return list(self.metadata_records.get(asset_id, [])) + def save_metadata_schema(self, schema: MetadataSchema) -> MetadataSchema: + self.metadata_schemas[schema.schema_id] = schema + return schema + + def get_metadata_schema(self, schema_id: str) -> MetadataSchema: + try: + return self.metadata_schemas[schema_id] + except KeyError as exc: + raise NotFoundError("Metadata schema not found", details={"schema_id": schema_id}) from exc + + def list_metadata_schemas(self) -> list[MetadataSchema]: + return sorted(self.metadata_schemas.values(), key=lambda schema: (schema.name, schema.schema_id)) + + def save_metadata_schema_assignment( + self, + assignment: MetadataSchemaAssignment, + ) -> MetadataSchemaAssignment: + self.get_metadata_schema(assignment.schema_id) + self.metadata_schema_assignments[assignment.assignment_id] = assignment + return assignment + + def get_metadata_schema_assignment(self, assignment_id: str) -> MetadataSchemaAssignment: + try: + return self.metadata_schema_assignments[assignment_id] + except KeyError as exc: + raise NotFoundError( + "Metadata schema assignment not found", + details={"assignment_id": assignment_id}, + ) from exc + + def list_metadata_schema_assignments(self) -> list[MetadataSchemaAssignment]: + return sorted( + self.metadata_schema_assignments.values(), + key=lambda assignment: (assignment.priority, assignment.schema_id, assignment.assignment_id), + ) + def save_context_entity(self, entity: ContextEntity) -> ContextEntity: self.context_entities[entity.entity_id] = entity return entity diff --git a/src/kontextual_engine/adapters/sqlite/asset_registry.py b/src/kontextual_engine/adapters/sqlite/asset_registry.py index 842acb9..62798f2 100644 --- a/src/kontextual_engine/adapters/sqlite/asset_registry.py +++ b/src/kontextual_engine/adapters/sqlite/asset_registry.py @@ -20,6 +20,8 @@ from kontextual_engine.core import ( KnowledgeAsset, LifecycleState, MetadataRecord, + MetadataSchema, + MetadataSchemaAssignment, RepresentationKind, RelationshipTargetKind, ) @@ -189,6 +191,74 @@ class SQLiteAssetRegistryRepository: self.get_asset(asset_id) return [MetadataRecord.from_dict(_loads(row["payload"])) for row in rows] + def save_metadata_schema(self, schema: MetadataSchema) -> MetadataSchema: + with self._connect() as conn: + conn.execute( + """ + insert into metadata_schemas (id, name, version, payload) + values (?, ?, ?, ?) + on conflict(id) do update set + name=excluded.name, + version=excluded.version, + payload=excluded.payload + """, + (schema.schema_id, schema.name, schema.version, _json(schema.to_dict())), + ) + return schema + + def get_metadata_schema(self, schema_id: str) -> MetadataSchema: + row = self._one("select payload from metadata_schemas where id = ?", (schema_id,)) + if row is None: + raise NotFoundError("Metadata schema not found", details={"schema_id": schema_id}) + return MetadataSchema.from_dict(_loads(row["payload"])) + + def list_metadata_schemas(self) -> list[MetadataSchema]: + rows = self._all("select payload from metadata_schemas order by name, id", ()) + return [MetadataSchema.from_dict(_loads(row["payload"])) for row in rows] + + def save_metadata_schema_assignment( + self, + assignment: MetadataSchemaAssignment, + ) -> MetadataSchemaAssignment: + self.get_metadata_schema(assignment.schema_id) + with self._connect() as conn: + conn.execute( + """ + insert into metadata_schema_assignments (id, schema_id, priority, payload) + values (?, ?, ?, ?) + on conflict(id) do update set + schema_id=excluded.schema_id, + priority=excluded.priority, + payload=excluded.payload + """, + ( + assignment.assignment_id, + assignment.schema_id, + assignment.priority, + _json(assignment.to_dict()), + ), + ) + return assignment + + def get_metadata_schema_assignment(self, assignment_id: str) -> MetadataSchemaAssignment: + row = self._one( + "select payload from metadata_schema_assignments where id = ?", + (assignment_id,), + ) + if row is None: + raise NotFoundError( + "Metadata schema assignment not found", + details={"assignment_id": assignment_id}, + ) + return MetadataSchemaAssignment.from_dict(_loads(row["payload"])) + + def list_metadata_schema_assignments(self) -> list[MetadataSchemaAssignment]: + rows = self._all( + "select payload from metadata_schema_assignments order by priority, schema_id, id", + (), + ) + return [MetadataSchemaAssignment.from_dict(_loads(row["payload"])) for row in rows] + def save_context_entity(self, entity: ContextEntity) -> ContextEntity: with self._connect() as conn: conn.execute( @@ -457,6 +527,18 @@ class SQLiteAssetRegistryRepository: key text not null, payload text not null ); + create table if not exists metadata_schemas ( + id text primary key, + name text not null, + version text not null, + payload text not null + ); + create table if not exists metadata_schema_assignments ( + id text primary key, + schema_id text not null references metadata_schemas(id) on delete cascade, + priority integer not null, + payload text not null + ); create table if not exists context_entities ( id text primary key, entity_type text not null, @@ -508,6 +590,7 @@ class SQLiteAssetRegistryRepository: create index if not exists idx_assets_lifecycle on assets(lifecycle); create index if not exists idx_representations_asset on representations(asset_id); create index if not exists idx_metadata_asset on metadata_records(asset_id); + create index if not exists idx_schema_assignments_schema on metadata_schema_assignments(schema_id); create index if not exists idx_entities_type on context_entities(entity_type); create index if not exists idx_relationships_source on core_relationships(source_id); create index if not exists idx_relationships_target on core_relationships(target_id); diff --git a/src/kontextual_engine/core/__init__.py b/src/kontextual_engine/core/__init__.py index 0db0aec..3aec49d 100644 --- a/src/kontextual_engine/core/__init__.py +++ b/src/kontextual_engine/core/__init__.py @@ -20,6 +20,7 @@ from .metadata import ( MetadataFieldDefinition, MetadataRecord, MetadataSchema, + MetadataSchemaAssignment, MetadataValidationIssue, MetadataValueType, Sensitivity, @@ -64,6 +65,7 @@ __all__ = [ "MetadataFieldDefinition", "MetadataRecord", "MetadataSchema", + "MetadataSchemaAssignment", "MetadataValidationIssue", "MetadataValueType", "NormalizedDocument", diff --git a/src/kontextual_engine/core/metadata.py b/src/kontextual_engine/core/metadata.py index ab76af5..ad9dab0 100644 --- a/src/kontextual_engine/core/metadata.py +++ b/src/kontextual_engine/core/metadata.py @@ -290,6 +290,63 @@ class MetadataSchema: ) +@dataclass(frozen=True) +class MetadataSchemaAssignment: + schema_id: str + asset_types: tuple[str, ...] = () + sensitivities: tuple[Sensitivity | str, ...] = () + lifecycle_states: tuple[LifecycleState | str, ...] = () + policy_ref: str | None = None + priority: int = 100 + metadata: dict[str, Any] = field(default_factory=dict) + assignment_id: str = field(default_factory=lambda: new_id("metadata_schema_assignment")) + + def __post_init__(self) -> None: + object.__setattr__(self, "asset_types", tuple(self.asset_types)) + object.__setattr__(self, "sensitivities", tuple(Sensitivity(item) for item in self.sensitivities)) + object.__setattr__( + self, + "lifecycle_states", + tuple(LifecycleState(item) for item in self.lifecycle_states), + ) + + def applies_to(self, classification: "Classification") -> bool: + if self.asset_types and classification.asset_type not in self.asset_types: + return False + if self.sensitivities and classification.sensitivity not in self.sensitivities: + return False + if self.lifecycle_states and classification.lifecycle not in self.lifecycle_states: + return False + return True + + def to_dict(self) -> dict[str, Any]: + return compact_dict( + { + "assignment_id": self.assignment_id, + "schema_id": self.schema_id, + "asset_types": list(self.asset_types), + "sensitivities": [item.value for item in self.sensitivities], + "lifecycle_states": [item.value for item in self.lifecycle_states], + "policy_ref": self.policy_ref, + "priority": self.priority, + "metadata": dict(self.metadata), + } + ) + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "MetadataSchemaAssignment": + return cls( + assignment_id=data["assignment_id"], + schema_id=data["schema_id"], + asset_types=tuple(data.get("asset_types", [])), + sensitivities=tuple(Sensitivity(item) for item in data.get("sensitivities", [])), + lifecycle_states=tuple(LifecycleState(item) for item in data.get("lifecycle_states", [])), + policy_ref=data.get("policy_ref"), + priority=int(data.get("priority", 100)), + metadata=dict(data.get("metadata", {})), + ) + + @dataclass(frozen=True) class Classification: asset_type: str diff --git a/src/kontextual_engine/ports/repositories.py b/src/kontextual_engine/ports/repositories.py index a64cf4c..827495e 100644 --- a/src/kontextual_engine/ports/repositories.py +++ b/src/kontextual_engine/ports/repositories.py @@ -17,6 +17,8 @@ from kontextual_engine.core import ( KnowledgeAsset, LifecycleState, MetadataRecord, + MetadataSchema, + MetadataSchemaAssignment, RepresentationKind, ) @@ -46,6 +48,16 @@ class AssetRegistryRepository(Protocol): def save_metadata_record(self, asset_id: str, record: MetadataRecord) -> MetadataRecord: ... def list_metadata_records(self, asset_id: str) -> list[MetadataRecord]: ... + def save_metadata_schema(self, schema: MetadataSchema) -> MetadataSchema: ... + def get_metadata_schema(self, schema_id: str) -> MetadataSchema: ... + def list_metadata_schemas(self) -> list[MetadataSchema]: ... + def save_metadata_schema_assignment( + self, + assignment: MetadataSchemaAssignment, + ) -> MetadataSchemaAssignment: ... + def get_metadata_schema_assignment(self, assignment_id: str) -> MetadataSchemaAssignment: ... + def list_metadata_schema_assignments(self) -> list[MetadataSchemaAssignment]: ... + def save_context_entity(self, entity: ContextEntity) -> ContextEntity: ... def get_context_entity(self, entity_id: str) -> ContextEntity: ... def list_context_entities(self) -> list[ContextEntity]: ... diff --git a/src/kontextual_engine/services/asset_service.py b/src/kontextual_engine/services/asset_service.py index ba04ca4..c443512 100644 --- a/src/kontextual_engine/services/asset_service.py +++ b/src/kontextual_engine/services/asset_service.py @@ -18,6 +18,7 @@ from kontextual_engine.core import ( mapping_digest, MetadataRecord, MetadataSchema, + MetadataSchemaAssignment, OperationContext, PolicyDecision, RelationshipTargetKind, @@ -176,6 +177,57 @@ class AssetRegistryService: ) return AssetChangeResult(asset, version, event, decision) + def register_metadata_schema( + self, + schema: MetadataSchema, + context: OperationContext, + ) -> MetadataSchema: + decision = self._authorize( + context, + "metadata_schema.register", + f"metadata_schema:{schema.schema_id}", + resource_metadata={"schema_id": schema.schema_id, "version": schema.version}, + ) + saved = self.repository.save_metadata_schema(schema) + self._audit( + "metadata_schema.register", + f"metadata_schema:{schema.schema_id}", + AuditOutcome.SUCCESS, + context, + decision, + details={"schema_id": schema.schema_id, "version": schema.version}, + ) + return saved + + def assign_metadata_schema( + self, + assignment: MetadataSchemaAssignment, + context: OperationContext, + ) -> MetadataSchemaAssignment: + self.repository.get_metadata_schema(assignment.schema_id) + decision = self._authorize( + context, + "metadata_schema.assign", + f"metadata_schema_assignment:{assignment.assignment_id}", + resource_metadata={"schema_id": assignment.schema_id}, + ) + saved = self.repository.save_metadata_schema_assignment(assignment) + self._audit( + "metadata_schema.assign", + f"metadata_schema_assignment:{assignment.assignment_id}", + AuditOutcome.SUCCESS, + context, + decision, + details={"schema_id": assignment.schema_id, "assignment_id": assignment.assignment_id}, + ) + return saved + + def list_metadata_schemas(self) -> list[MetadataSchema]: + return self.repository.list_metadata_schemas() + + def list_metadata_schema_assignments(self) -> list[MetadataSchemaAssignment]: + return self.repository.list_metadata_schema_assignments() + def add_representation( self, asset_id: str, @@ -417,9 +469,23 @@ class AssetRegistryService: classification: Classification, records: list[MetadataRecord], ) -> None: - for schema in self.metadata_schemas: - if schema.applies_to(classification): - schema.validate_or_raise(records) + for schema in self._metadata_schemas_for(classification): + schema.validate_or_raise(records) + + def _metadata_schemas_for(self, classification: Classification) -> tuple[MetadataSchema, ...]: + selected: list[MetadataSchema] = [ + schema for schema in self.metadata_schemas if schema.applies_to(classification) + ] + seen = {schema.schema_id for schema in selected} + for assignment in self.repository.list_metadata_schema_assignments(): + if not assignment.applies_to(classification): + continue + schema = self.repository.get_metadata_schema(assignment.schema_id) + if schema.schema_id in seen or not schema.applies_to(classification): + continue + selected.append(schema) + seen.add(schema.schema_id) + return tuple(selected) def _idempotent_lookup( self, diff --git a/tests/test_asset_registry.py b/tests/test_asset_registry.py index 79b4a72..13b1676 100644 --- a/tests/test_asset_registry.py +++ b/tests/test_asset_registry.py @@ -16,6 +16,7 @@ from kontextual_engine import ( MetadataFieldDefinition, MetadataRecord, MetadataSchema, + MetadataSchemaAssignment, MetadataValueType, OperationContext, PolicyDecision, @@ -224,6 +225,60 @@ def test_asset_registry_validates_metadata_schema_before_writes() -> None: assert [record.key for record in repo.list_metadata_records(created.asset.id)] == ["owner"] +def test_asset_registry_applies_persisted_metadata_schema_assignments() -> None: + repo = InMemoryAssetRegistryRepository() + service = AssetRegistryService(repo) + context = operation_context() + schema = MetadataSchema( + schema_id="schema-policy-note-v1", + name="Policy Note Metadata", + allow_unknown=False, + fields=( + MetadataFieldDefinition("owner", MetadataValueType.STRING, required=True, require_confirmed=True), + MetadataFieldDefinition("state", MetadataValueType.STRING, allowed_values=("draft", "approved")), + ), + ) + assignment = MetadataSchemaAssignment( + assignment_id="assignment-policy-note", + schema_id=schema.schema_id, + asset_types=("policy-note",), + sensitivities=(Sensitivity.INTERNAL,), + policy_ref="local://metadata-policy/policy-note", + ) + + service.register_metadata_schema(schema, context) + service.assign_metadata_schema(assignment, context) + + with pytest.raises(ValidationError) as exc_info: + service.create_asset( + "Policy Note", + Classification(asset_type="policy-note", sensitivity=Sensitivity.INTERNAL), + context, + asset_id="asset-policy-note-invalid", + metadata_records=[MetadataRecord("state", "published")], + ) + + assert {issue["code"] for issue in exc_info.value.details["issues"]} == { + "metadata.required_missing", + "metadata.value_not_allowed", + } + assert repo.list_assets() == [] + + created = service.create_asset( + "Policy Note", + Classification(asset_type="policy-note", sensitivity=Sensitivity.INTERNAL), + context, + asset_id="asset-policy-note", + metadata_records=[ + MetadataRecord("owner", "Platform Knowledge", confirmed=True), + MetadataRecord("state", "approved", confirmed=True), + ], + ) + + assert created.asset.id == "asset-policy-note" + assert service.list_metadata_schema_assignments()[0].policy_ref == "local://metadata-policy/policy-note" + + def test_sqlite_asset_registry_survives_reinstantiation(tmp_path: Path) -> None: db_path = tmp_path / "registry.sqlite" repo = SQLiteAssetRegistryRepository(db_path) @@ -305,6 +360,67 @@ def test_sqlite_registry_persists_context_entities_relationships_and_idempotency ] +def test_sqlite_registry_persists_metadata_schemas_and_assignments(tmp_path: Path) -> None: + db_path = tmp_path / "registry.sqlite" + repo = SQLiteAssetRegistryRepository(db_path) + service = AssetRegistryService(repo) + context = operation_context() + schema = MetadataSchema( + schema_id="schema-review-v1", + name="Review Metadata", + allow_unknown=False, + fields=( + MetadataFieldDefinition("reviewer", MetadataValueType.STRING, required=True, require_confirmed=True), + MetadataFieldDefinition("score", MetadataValueType.NUMBER, min_value=0, max_value=1), + ), + ) + + service.register_metadata_schema(schema, context) + service.assign_metadata_schema( + MetadataSchemaAssignment( + assignment_id="assignment-review-documents", + schema_id=schema.schema_id, + asset_types=("review",), + ), + context, + ) + + reloaded_service = AssetRegistryService(SQLiteAssetRegistryRepository(db_path)) + + with pytest.raises(ValidationError) as exc_info: + reloaded_service.create_asset( + "Review", + Classification(asset_type="review", sensitivity=Sensitivity.INTERNAL), + context, + asset_id="asset-review-invalid", + metadata_records=[ + MetadataRecord("reviewer", "Ada", confirmed=False), + MetadataRecord("score", 1.7), + ], + ) + + assert {issue["code"] for issue in exc_info.value.details["issues"]} == { + "metadata.confirmation_required", + "metadata.value_too_large", + } + + created = reloaded_service.create_asset( + "Review", + Classification(asset_type="review", sensitivity=Sensitivity.INTERNAL), + context, + asset_id="asset-review", + metadata_records=[ + MetadataRecord("reviewer", "Ada", confirmed=True), + MetadataRecord("score", 0.92), + ], + ) + + reloaded_repo = SQLiteAssetRegistryRepository(db_path) + assert created.asset.id == "asset-review" + assert reloaded_repo.get_metadata_schema("schema-review-v1").name == "Review Metadata" + assert reloaded_repo.get_metadata_schema_assignment("assignment-review-documents").schema_id == "schema-review-v1" + + def test_sqlite_registry_enforces_representation_asset_reference(tmp_path: Path) -> None: repo = SQLiteAssetRegistryRepository(tmp_path / "registry.sqlite") representation = AssetRepresentation.from_content( diff --git a/tests/test_core_architecture.py b/tests/test_core_architecture.py index 8db59eb..10b116f 100644 --- a/tests/test_core_architecture.py +++ b/tests/test_core_architecture.py @@ -12,6 +12,7 @@ from kontextual_engine.core import ( MetadataFieldDefinition, MetadataRecord, MetadataSchema, + MetadataSchemaAssignment, MetadataValueType, OperationContext, PolicyDecision, @@ -210,3 +211,25 @@ def test_metadata_schema_reports_structured_validation_issues() -> None: assert schema.applies_to(Classification(asset_type="document")) is True assert schema.applies_to(Classification(asset_type="dataset")) is False assert MetadataSchema.from_dict(schema.to_dict()).fields[0].value_type == MetadataValueType.STRING + + +def test_metadata_schema_assignment_matches_classification_and_roundtrips() -> None: + assignment = MetadataSchemaAssignment( + assignment_id="assignment-documents", + schema_id="schema-document-v1", + asset_types=("document",), + sensitivities=(Sensitivity.INTERNAL,), + lifecycle_states=(LifecycleState.ACTIVE,), + policy_ref="local://policy/document-metadata", + ) + + assert assignment.applies_to( + Classification(asset_type="document", sensitivity=Sensitivity.INTERNAL) + ) is True + assert assignment.applies_to( + Classification(asset_type="document", sensitivity=Sensitivity.CONFIDENTIAL) + ) is False + assert ( + MetadataSchemaAssignment.from_dict(assignment.to_dict()).policy_ref + == "local://policy/document-metadata" + ) diff --git a/tests/test_markitect_tool_contract.py b/tests/test_markitect_tool_contract.py index 69d5219..fbb817b 100644 --- a/tests/test_markitect_tool_contract.py +++ b/tests/test_markitect_tool_contract.py @@ -23,6 +23,7 @@ INTERNAL = EXAMPLE_ROOT / "corpus" / "internal-risk-note.md" BUNDLE = EXAMPLE_ROOT / "composition" / "context-bundle.md" MANIFEST = EXAMPLE_ROOT / "manifests" / "agent-context.yaml" CONTRACT = EXAMPLE_ROOT / "contracts" / "decision-record.contract.md" +SCHEMA = EXAMPLE_ROOT / "schemas" / "adr-proxy.schema.md" def test_markitect_parser_returns_structured_markdown_document() -> None: @@ -170,3 +171,16 @@ def test_markitect_document_contracts_accept_valid_and_report_invalid_documents( assert invalid.valid is False assert "contract.section.missing" in invalid_codes assert "contract.section.forbidden" in invalid_codes + + +def test_markitect_schema_validation_accepts_markdown_proxy_documents() -> None: + loaded_schema = mkt.load_schema_file(SCHEMA) + schema_check = mkt.validate_schema(loaded_schema.schema) + valid = mkt.validate_markdown_file(ADR, SCHEMA) + invalid = mkt.validate_markdown_file(INVALID_ADR, SCHEMA) + + assert loaded_schema.metadata["schema-id"] == "kontextual-engine.markdown-proxy.adr.v1" + assert schema_check.valid is True + assert valid.valid is True + assert invalid.valid is False + assert any("Decision" in violation.message for violation in invalid.violations) diff --git a/workplans/KONT-WP-0005-asset-registry-governance-state.md b/workplans/KONT-WP-0005-asset-registry-governance-state.md index fb87474..8ebb88d 100644 --- a/workplans/KONT-WP-0005-asset-registry-governance-state.md +++ b/workplans/KONT-WP-0005-asset-registry-governance-state.md @@ -45,6 +45,12 @@ adapter metadata on representations or versions. It must not make Markitect document classes canonical engine entities, and asset identity must remain independent of Markitect snapshot identity. +Markdown proxy documents are valid source, normalized, or derived +representations for assets when Markitect selectors, contracts, document +schemas, or workflows are useful. They remain adapter representations under +engine governance; the registry still owns identity, metadata, lifecycle, +policy, lineage, and audit. + ## Implementation Note The first registry slice is recorded in @@ -59,11 +65,11 @@ As of 2026-05-06, the registry core has a working asset service, in-memory and SQLite repositories, policy gateway boundary, audit events, versions, representations, metadata records, context entities, asset/context relationships, idempotent asset creation, and custom metadata schema -validation before registry writes. Remaining work in this workplan is -concentrated on schema registry/policy assignment, standard metadata filtering -beyond lifecycle and asset type, restore/supersession operations, conflict -semantics beyond sequence/idempotency checks, and batch partial-failure -envelopes. +validation before registry writes. It now also includes a durable metadata +schema registry and assignment rules for policy-selected validation. Remaining +work in this workplan is concentrated on standard metadata filtering beyond +lifecycle and asset type, restore/supersession operations, conflict semantics +beyond sequence/idempotency checks, and batch partial-failure envelopes. ## G5.1 - Implement stable asset identity and source references