Markitect schema-validation integration use case and fixture for Markdown proxy documents

This commit is contained in:
2026-05-06 04:03:50 +02:00
parent c271385e35
commit dbe93be1a9
16 changed files with 518 additions and 12 deletions

View File

@@ -39,6 +39,7 @@ from .core import (
MetadataFieldDefinition,
MetadataRecord,
MetadataSchema,
MetadataSchemaAssignment,
MetadataValidationIssue,
MetadataValueType,
NormalizedDocument,
@@ -142,6 +143,7 @@ __all__ = [
"MetadataFieldDefinition",
"MetadataRecord",
"MetadataSchema",
"MetadataSchemaAssignment",
"MetadataValidationIssue",
"MetadataValueType",
"NormalizedDocument",

View File

@@ -18,6 +18,8 @@ from kontextual_engine.core import (
KnowledgeAsset,
LifecycleState,
MetadataRecord,
MetadataSchema,
MetadataSchemaAssignment,
RepresentationKind,
)
from kontextual_engine.errors import NotFoundError, ValidationError
@@ -29,6 +31,8 @@ class InMemoryAssetRegistryRepository:
assets: dict[str, KnowledgeAsset] = field(default_factory=dict)
representations: dict[str, AssetRepresentation] = field(default_factory=dict)
metadata_records: dict[str, list[MetadataRecord]] = field(default_factory=dict)
metadata_schemas: dict[str, MetadataSchema] = field(default_factory=dict)
metadata_schema_assignments: dict[str, MetadataSchemaAssignment] = field(default_factory=dict)
context_entities: dict[str, ContextEntity] = field(default_factory=dict)
relationships: dict[str, CoreRelationship] = field(default_factory=dict)
versions: dict[str, list[AssetVersion]] = field(default_factory=dict)
@@ -105,6 +109,42 @@ class InMemoryAssetRegistryRepository:
self.get_asset(asset_id)
return list(self.metadata_records.get(asset_id, []))
def save_metadata_schema(self, schema: MetadataSchema) -> MetadataSchema:
self.metadata_schemas[schema.schema_id] = schema
return schema
def get_metadata_schema(self, schema_id: str) -> MetadataSchema:
try:
return self.metadata_schemas[schema_id]
except KeyError as exc:
raise NotFoundError("Metadata schema not found", details={"schema_id": schema_id}) from exc
def list_metadata_schemas(self) -> list[MetadataSchema]:
return sorted(self.metadata_schemas.values(), key=lambda schema: (schema.name, schema.schema_id))
def save_metadata_schema_assignment(
self,
assignment: MetadataSchemaAssignment,
) -> MetadataSchemaAssignment:
self.get_metadata_schema(assignment.schema_id)
self.metadata_schema_assignments[assignment.assignment_id] = assignment
return assignment
def get_metadata_schema_assignment(self, assignment_id: str) -> MetadataSchemaAssignment:
try:
return self.metadata_schema_assignments[assignment_id]
except KeyError as exc:
raise NotFoundError(
"Metadata schema assignment not found",
details={"assignment_id": assignment_id},
) from exc
def list_metadata_schema_assignments(self) -> list[MetadataSchemaAssignment]:
return sorted(
self.metadata_schema_assignments.values(),
key=lambda assignment: (assignment.priority, assignment.schema_id, assignment.assignment_id),
)
def save_context_entity(self, entity: ContextEntity) -> ContextEntity:
self.context_entities[entity.entity_id] = entity
return entity

View File

@@ -20,6 +20,8 @@ from kontextual_engine.core import (
KnowledgeAsset,
LifecycleState,
MetadataRecord,
MetadataSchema,
MetadataSchemaAssignment,
RepresentationKind,
RelationshipTargetKind,
)
@@ -189,6 +191,74 @@ class SQLiteAssetRegistryRepository:
self.get_asset(asset_id)
return [MetadataRecord.from_dict(_loads(row["payload"])) for row in rows]
def save_metadata_schema(self, schema: MetadataSchema) -> MetadataSchema:
with self._connect() as conn:
conn.execute(
"""
insert into metadata_schemas (id, name, version, payload)
values (?, ?, ?, ?)
on conflict(id) do update set
name=excluded.name,
version=excluded.version,
payload=excluded.payload
""",
(schema.schema_id, schema.name, schema.version, _json(schema.to_dict())),
)
return schema
def get_metadata_schema(self, schema_id: str) -> MetadataSchema:
row = self._one("select payload from metadata_schemas where id = ?", (schema_id,))
if row is None:
raise NotFoundError("Metadata schema not found", details={"schema_id": schema_id})
return MetadataSchema.from_dict(_loads(row["payload"]))
def list_metadata_schemas(self) -> list[MetadataSchema]:
rows = self._all("select payload from metadata_schemas order by name, id", ())
return [MetadataSchema.from_dict(_loads(row["payload"])) for row in rows]
def save_metadata_schema_assignment(
self,
assignment: MetadataSchemaAssignment,
) -> MetadataSchemaAssignment:
self.get_metadata_schema(assignment.schema_id)
with self._connect() as conn:
conn.execute(
"""
insert into metadata_schema_assignments (id, schema_id, priority, payload)
values (?, ?, ?, ?)
on conflict(id) do update set
schema_id=excluded.schema_id,
priority=excluded.priority,
payload=excluded.payload
""",
(
assignment.assignment_id,
assignment.schema_id,
assignment.priority,
_json(assignment.to_dict()),
),
)
return assignment
def get_metadata_schema_assignment(self, assignment_id: str) -> MetadataSchemaAssignment:
row = self._one(
"select payload from metadata_schema_assignments where id = ?",
(assignment_id,),
)
if row is None:
raise NotFoundError(
"Metadata schema assignment not found",
details={"assignment_id": assignment_id},
)
return MetadataSchemaAssignment.from_dict(_loads(row["payload"]))
def list_metadata_schema_assignments(self) -> list[MetadataSchemaAssignment]:
rows = self._all(
"select payload from metadata_schema_assignments order by priority, schema_id, id",
(),
)
return [MetadataSchemaAssignment.from_dict(_loads(row["payload"])) for row in rows]
def save_context_entity(self, entity: ContextEntity) -> ContextEntity:
with self._connect() as conn:
conn.execute(
@@ -457,6 +527,18 @@ class SQLiteAssetRegistryRepository:
key text not null,
payload text not null
);
create table if not exists metadata_schemas (
id text primary key,
name text not null,
version text not null,
payload text not null
);
create table if not exists metadata_schema_assignments (
id text primary key,
schema_id text not null references metadata_schemas(id) on delete cascade,
priority integer not null,
payload text not null
);
create table if not exists context_entities (
id text primary key,
entity_type text not null,
@@ -508,6 +590,7 @@ class SQLiteAssetRegistryRepository:
create index if not exists idx_assets_lifecycle on assets(lifecycle);
create index if not exists idx_representations_asset on representations(asset_id);
create index if not exists idx_metadata_asset on metadata_records(asset_id);
create index if not exists idx_schema_assignments_schema on metadata_schema_assignments(schema_id);
create index if not exists idx_entities_type on context_entities(entity_type);
create index if not exists idx_relationships_source on core_relationships(source_id);
create index if not exists idx_relationships_target on core_relationships(target_id);

View File

@@ -20,6 +20,7 @@ from .metadata import (
MetadataFieldDefinition,
MetadataRecord,
MetadataSchema,
MetadataSchemaAssignment,
MetadataValidationIssue,
MetadataValueType,
Sensitivity,
@@ -64,6 +65,7 @@ __all__ = [
"MetadataFieldDefinition",
"MetadataRecord",
"MetadataSchema",
"MetadataSchemaAssignment",
"MetadataValidationIssue",
"MetadataValueType",
"NormalizedDocument",

View File

@@ -290,6 +290,63 @@ class MetadataSchema:
)
@dataclass(frozen=True)
class MetadataSchemaAssignment:
schema_id: str
asset_types: tuple[str, ...] = ()
sensitivities: tuple[Sensitivity | str, ...] = ()
lifecycle_states: tuple[LifecycleState | str, ...] = ()
policy_ref: str | None = None
priority: int = 100
metadata: dict[str, Any] = field(default_factory=dict)
assignment_id: str = field(default_factory=lambda: new_id("metadata_schema_assignment"))
def __post_init__(self) -> None:
object.__setattr__(self, "asset_types", tuple(self.asset_types))
object.__setattr__(self, "sensitivities", tuple(Sensitivity(item) for item in self.sensitivities))
object.__setattr__(
self,
"lifecycle_states",
tuple(LifecycleState(item) for item in self.lifecycle_states),
)
def applies_to(self, classification: "Classification") -> bool:
if self.asset_types and classification.asset_type not in self.asset_types:
return False
if self.sensitivities and classification.sensitivity not in self.sensitivities:
return False
if self.lifecycle_states and classification.lifecycle not in self.lifecycle_states:
return False
return True
def to_dict(self) -> dict[str, Any]:
return compact_dict(
{
"assignment_id": self.assignment_id,
"schema_id": self.schema_id,
"asset_types": list(self.asset_types),
"sensitivities": [item.value for item in self.sensitivities],
"lifecycle_states": [item.value for item in self.lifecycle_states],
"policy_ref": self.policy_ref,
"priority": self.priority,
"metadata": dict(self.metadata),
}
)
@classmethod
def from_dict(cls, data: dict[str, Any]) -> "MetadataSchemaAssignment":
return cls(
assignment_id=data["assignment_id"],
schema_id=data["schema_id"],
asset_types=tuple(data.get("asset_types", [])),
sensitivities=tuple(Sensitivity(item) for item in data.get("sensitivities", [])),
lifecycle_states=tuple(LifecycleState(item) for item in data.get("lifecycle_states", [])),
policy_ref=data.get("policy_ref"),
priority=int(data.get("priority", 100)),
metadata=dict(data.get("metadata", {})),
)
@dataclass(frozen=True)
class Classification:
asset_type: str

View File

@@ -17,6 +17,8 @@ from kontextual_engine.core import (
KnowledgeAsset,
LifecycleState,
MetadataRecord,
MetadataSchema,
MetadataSchemaAssignment,
RepresentationKind,
)
@@ -46,6 +48,16 @@ class AssetRegistryRepository(Protocol):
def save_metadata_record(self, asset_id: str, record: MetadataRecord) -> MetadataRecord: ...
def list_metadata_records(self, asset_id: str) -> list[MetadataRecord]: ...
def save_metadata_schema(self, schema: MetadataSchema) -> MetadataSchema: ...
def get_metadata_schema(self, schema_id: str) -> MetadataSchema: ...
def list_metadata_schemas(self) -> list[MetadataSchema]: ...
def save_metadata_schema_assignment(
self,
assignment: MetadataSchemaAssignment,
) -> MetadataSchemaAssignment: ...
def get_metadata_schema_assignment(self, assignment_id: str) -> MetadataSchemaAssignment: ...
def list_metadata_schema_assignments(self) -> list[MetadataSchemaAssignment]: ...
def save_context_entity(self, entity: ContextEntity) -> ContextEntity: ...
def get_context_entity(self, entity_id: str) -> ContextEntity: ...
def list_context_entities(self) -> list[ContextEntity]: ...

View File

@@ -18,6 +18,7 @@ from kontextual_engine.core import (
mapping_digest,
MetadataRecord,
MetadataSchema,
MetadataSchemaAssignment,
OperationContext,
PolicyDecision,
RelationshipTargetKind,
@@ -176,6 +177,57 @@ class AssetRegistryService:
)
return AssetChangeResult(asset, version, event, decision)
def register_metadata_schema(
self,
schema: MetadataSchema,
context: OperationContext,
) -> MetadataSchema:
decision = self._authorize(
context,
"metadata_schema.register",
f"metadata_schema:{schema.schema_id}",
resource_metadata={"schema_id": schema.schema_id, "version": schema.version},
)
saved = self.repository.save_metadata_schema(schema)
self._audit(
"metadata_schema.register",
f"metadata_schema:{schema.schema_id}",
AuditOutcome.SUCCESS,
context,
decision,
details={"schema_id": schema.schema_id, "version": schema.version},
)
return saved
def assign_metadata_schema(
self,
assignment: MetadataSchemaAssignment,
context: OperationContext,
) -> MetadataSchemaAssignment:
self.repository.get_metadata_schema(assignment.schema_id)
decision = self._authorize(
context,
"metadata_schema.assign",
f"metadata_schema_assignment:{assignment.assignment_id}",
resource_metadata={"schema_id": assignment.schema_id},
)
saved = self.repository.save_metadata_schema_assignment(assignment)
self._audit(
"metadata_schema.assign",
f"metadata_schema_assignment:{assignment.assignment_id}",
AuditOutcome.SUCCESS,
context,
decision,
details={"schema_id": assignment.schema_id, "assignment_id": assignment.assignment_id},
)
return saved
def list_metadata_schemas(self) -> list[MetadataSchema]:
return self.repository.list_metadata_schemas()
def list_metadata_schema_assignments(self) -> list[MetadataSchemaAssignment]:
return self.repository.list_metadata_schema_assignments()
def add_representation(
self,
asset_id: str,
@@ -417,9 +469,23 @@ class AssetRegistryService:
classification: Classification,
records: list[MetadataRecord],
) -> None:
for schema in self.metadata_schemas:
if schema.applies_to(classification):
schema.validate_or_raise(records)
for schema in self._metadata_schemas_for(classification):
schema.validate_or_raise(records)
def _metadata_schemas_for(self, classification: Classification) -> tuple[MetadataSchema, ...]:
selected: list[MetadataSchema] = [
schema for schema in self.metadata_schemas if schema.applies_to(classification)
]
seen = {schema.schema_id for schema in selected}
for assignment in self.repository.list_metadata_schema_assignments():
if not assignment.applies_to(classification):
continue
schema = self.repository.get_metadata_schema(assignment.schema_id)
if schema.schema_id in seen or not schema.applies_to(classification):
continue
selected.append(schema)
seen.add(schema.schema_id)
return tuple(selected)
def _idempotent_lookup(
self,