CMIS domain mapper

This commit is contained in:
2026-05-07 00:57:12 +02:00
parent 241522e74d
commit 41da61896b
6 changed files with 571 additions and 3 deletions

View File

@@ -2,7 +2,7 @@
Date: 2026-05-06
Status: first implementation slice started.
Status: profile and mapper slices implemented.
## Implemented Slice
@@ -14,6 +14,8 @@ boundary used by the future API adapter:
- `CMISAction`
- `CMISAccessProfile`
- `CMISAccessPoint`
- `CMISDomainMapper`
- `CMISObjectProjection`
The layer is intentionally small. It decides whether a CMIS action is allowed
for a profile and whether an engine asset may be exposed through an access
@@ -46,3 +48,20 @@ Decisions return existing `PolicyDecision` objects so later CMIS routes can
emit compatible diagnostics and audit records without inventing another policy
model.
## Mapper Slice
`CMISDomainMapper` projects existing engine state into CMIS-shaped envelopes:
- repository info and CMIS 1.1 Browser Binding capability flags,
- base type definitions for document, folder, relationship, policy, item, and
secondary,
- engine assets as CMIS document projections,
- representation metadata as content stream descriptors,
- asset versions as CMIS version properties,
- relationship primitives as CMIS relationship objects,
- profile-derived allowable actions.
The mapper returns `None` for assets or relationships that the access-point
profile must not expose. It does not fetch from repositories directly; callers
provide the asset, representations, versions, metadata records, and
relationships they have already authorized or loaded.

View File

@@ -26,8 +26,11 @@ from .core import (
CMISAccessPoint,
CMISAccessProfile,
CMISAction,
CMISBaseType,
CMISBinding,
CMISCapability,
CMISDomainMapper,
CMISObjectProjection,
ConnectorCapability,
ContextEntity,
ContextEntityType,
@@ -175,8 +178,11 @@ __all__ = [
"CMISAccessPoint",
"CMISAccessProfile",
"CMISAction",
"CMISBaseType",
"CMISBinding",
"CMISCapability",
"CMISDomainMapper",
"CMISObjectProjection",
"ConnectorCapability",
"Collection",
"ContextAssembler",

View File

@@ -9,6 +9,9 @@ from .cmis import (
CMISAction,
CMISBinding,
CMISCapability,
CMISBaseType,
CMISDomainMapper,
CMISObjectProjection,
)
from .idempotency import IdempotencyRecord, IdempotencyStatus
from .ingestion import (
@@ -77,8 +80,11 @@ __all__ = [
"CMISAccessPoint",
"CMISAccessProfile",
"CMISAction",
"CMISBaseType",
"CMISBinding",
"CMISCapability",
"CMISDomainMapper",
"CMISObjectProjection",
"ConnectorCapability",
"ContextEntity",
"ContextEntityType",

View File

@@ -11,9 +11,11 @@ from enum import Enum
from typing import Any
from .actors import ActorType, OperationContext
from .assets import KnowledgeAsset
from .assets import AssetRepresentation, KnowledgeAsset, RepresentationKind
from .metadata import Sensitivity
from .policy import PolicyDecision
from .provenance import AssetVersion
from .relationships import CoreRelationship, RelationshipTargetKind
from .primitives import compact_dict
@@ -60,6 +62,15 @@ class CMISAction(str, Enum):
BULK_UPDATE_PROPERTIES = "bulk_update_properties"
class CMISBaseType(str, Enum):
DOCUMENT = "cmis:document"
FOLDER = "cmis:folder"
RELATIONSHIP = "cmis:relationship"
POLICY = "cmis:policy"
ITEM = "cmis:item"
SECONDARY = "cmis:secondary"
ACTION_CAPABILITIES: dict[CMISAction, CMISCapability] = {
CMISAction.GET_REPOSITORY_INFO: CMISCapability.REPOSITORY,
CMISAction.GET_TYPE_DEFINITION: CMISCapability.TYPE_DEFINITIONS,
@@ -408,6 +419,280 @@ class CMISAccessPoint:
)
@dataclass(frozen=True)
class CMISObjectProjection:
object_id: str
base_type_id: CMISBaseType
type_id: str
name: str
properties: dict[str, Any]
allowable_actions: tuple[CMISAction, ...] = ()
path: str | None = None
content_stream: dict[str, Any] | None = None
version: dict[str, Any] | None = None
relationships: tuple[str, ...] = ()
def to_dict(self) -> dict[str, Any]:
return compact_dict(
{
"object_id": self.object_id,
"base_type_id": self.base_type_id.value,
"type_id": self.type_id,
"name": self.name,
"path": self.path,
"properties": dict(self.properties),
"allowable_actions": [action.value for action in self.allowable_actions],
"content_stream": dict(self.content_stream or {}),
"version": dict(self.version or {}),
"relationships": list(self.relationships),
}
)
class CMISDomainMapper:
"""Project engine domain objects into CMIS-shaped envelopes."""
def __init__(self, access_point: CMISAccessPoint) -> None:
self.access_point = access_point
def repository_info(self) -> dict[str, Any]:
profile = self.access_point.profile
return {
"repository_id": self.access_point.repository_id,
"repository_name": self.access_point.metadata.get("repository_name", self.access_point.repository_id),
"cmis_version_supported": "1.1",
"root_folder_id": self.access_point.root_folder_id,
"principal_anonymous": "anonymous",
"principal_anyone": "anyone",
"product_name": "kontextual-engine",
"binding": profile.binding.value,
"capabilities": self.capability_flags(),
"profile": profile.name,
}
def capability_flags(self) -> dict[str, Any]:
profile = self.access_point.profile
return {
"capability_content_stream_updatability": (
"anytime" if profile.has_capability(CMISCapability.CONTENT_STREAM_WRITE) else "none"
),
"capability_changes": "objectidsonly"
if profile.has_capability(CMISCapability.CHANGE_LOG)
else "none",
"capability_renditions": "read" if profile.has_capability(CMISCapability.RENDITIONS) else "none",
"capability_get_descendants": profile.has_capability(CMISCapability.NAVIGATION),
"capability_get_folder_tree": profile.has_capability(CMISCapability.NAVIGATION),
"capability_multifiling": False,
"capability_unfiling": False,
"capability_version_specific_filing": False,
"capability_pwc_searchable": False,
"capability_pwc_updatable": False,
"capability_all_versions_searchable": profile.has_capability(CMISCapability.VERSIONING),
"capability_query": "metadataonly"
if profile.has_capability(CMISCapability.DISCOVERY_QUERY)
else "none",
"capability_join": "none",
"capability_acl": "discover" if profile.has_capability(CMISCapability.ACL) else "none",
}
def type_definitions(self) -> list[dict[str, Any]]:
can_write = self.access_point.profile.allow_mutations
return [
_type_definition(CMISBaseType.DOCUMENT, "kontextual:document", "Kontextual Document", can_write),
_type_definition(CMISBaseType.FOLDER, "kontextual:folder", "Kontextual Folder", False),
_type_definition(
CMISBaseType.RELATIONSHIP,
"kontextual:relationship",
"Kontextual Relationship",
can_write,
),
_type_definition(CMISBaseType.POLICY, "kontextual:policy", "Kontextual Policy", False),
_type_definition(CMISBaseType.ITEM, "kontextual:item", "Kontextual Item", False),
_type_definition(CMISBaseType.SECONDARY, "kontextual:secondary", "Kontextual Secondary", False),
]
def map_asset(
self,
asset: KnowledgeAsset,
context: OperationContext,
*,
representations: list[AssetRepresentation] | tuple[AssetRepresentation, ...] = (),
versions: list[AssetVersion] | tuple[AssetVersion, ...] = (),
relationship_ids: list[str] | tuple[str, ...] = (),
metadata_records: list[Any] | tuple[Any, ...] = (),
) -> CMISObjectProjection | None:
if not self.access_point.exposes_asset(asset, context):
return None
current_version = _current_version(asset, versions)
content_stream = self.map_content_stream(asset, representations)
return CMISObjectProjection(
object_id=self.asset_object_id(asset.id),
base_type_id=CMISBaseType.DOCUMENT,
type_id=f"kontextual:{asset.classification.asset_type}",
name=asset.title,
path=self.asset_path(asset),
properties=self.asset_properties(asset, metadata_records=metadata_records),
allowable_actions=self.allowable_actions(context, has_content_stream=content_stream is not None),
content_stream=content_stream,
version=self.version_properties(asset, current_version, versions),
relationships=tuple(relationship_ids),
)
def map_relationship(
self,
relationship: CoreRelationship,
context: OperationContext,
) -> CMISObjectProjection | None:
decision = self.access_point.decide_action(
CMISAction.GET_RELATIONSHIPS,
context,
resource=f"relationship:{relationship.relationship_id}",
)
if not decision.allowed:
return None
source_id = self.asset_object_id(relationship.source_id)
target_id = (
self.asset_object_id(relationship.target_id)
if relationship.target_kind == RelationshipTargetKind.ASSET
else f"cmis:entity:{relationship.target_id}"
)
return CMISObjectProjection(
object_id=f"cmis:relationship:{relationship.relationship_id}",
base_type_id=CMISBaseType.RELATIONSHIP,
type_id="kontextual:relationship",
name=relationship.predicate,
properties={
"cmis:objectId": f"cmis:relationship:{relationship.relationship_id}",
"cmis:name": relationship.predicate,
"cmis:baseTypeId": CMISBaseType.RELATIONSHIP.value,
"cmis:objectTypeId": "kontextual:relationship",
"cmis:sourceId": source_id,
"cmis:targetId": target_id,
"kontextual:predicate": relationship.predicate,
"kontextual:confidence": relationship.confidence,
"kontextual:targetKind": relationship.target_kind.value,
},
allowable_actions=(CMISAction.GET_OBJECT, CMISAction.GET_RELATIONSHIPS),
)
def asset_object_id(self, asset_id: str) -> str:
return f"cmis:asset:{asset_id}"
def asset_path(self, asset: KnowledgeAsset) -> str:
explicit = asset.metadata.get("cmis_path")
if explicit:
return _normalize_path(str(explicit))
if asset.source_refs:
source_ref = asset.source_refs[0]
source_root = _safe_path_segment(source_ref.source_system)
if source_ref.path:
return _normalize_path(f"/sources/{source_root}/{source_ref.path}")
if source_ref.external_id:
return _normalize_path(f"/sources/{source_root}/{source_ref.external_id}")
topics = asset.classification.topics
if topics:
return _normalize_path(f"/topics/{topics[0]}/{asset.id}")
return _normalize_path(f"/assets/{asset.classification.asset_type}/{asset.id}")
def asset_properties(
self,
asset: KnowledgeAsset,
*,
metadata_records: list[Any] | tuple[Any, ...] = (),
) -> dict[str, Any]:
classification = asset.classification
properties = {
"cmis:objectId": self.asset_object_id(asset.id),
"cmis:name": asset.title,
"cmis:baseTypeId": CMISBaseType.DOCUMENT.value,
"cmis:objectTypeId": f"kontextual:{classification.asset_type}",
"cmis:createdBy": asset.metadata.get("created_by"),
"cmis:lastModifiedBy": asset.metadata.get("updated_by"),
"cmis:creationDate": asset.created_at,
"cmis:lastModificationDate": asset.updated_at,
"cmis:changeToken": asset.current_version_id,
"kontextual:assetId": asset.id,
"kontextual:assetType": classification.asset_type,
"kontextual:sensitivity": _enum_value(classification.sensitivity),
"kontextual:lifecycle": _enum_value(asset.lifecycle),
"kontextual:owner": classification.owner,
"kontextual:topics": list(classification.topics),
"kontextual:reviewState": classification.review_state,
}
for record in metadata_records:
key = getattr(record, "key", None)
if key:
properties[f"kontextual:metadata:{key}"] = getattr(record, "value", None)
return compact_dict(properties)
def map_content_stream(
self,
asset: KnowledgeAsset,
representations: list[AssetRepresentation] | tuple[AssetRepresentation, ...],
) -> dict[str, Any] | None:
representation = _preferred_representation(representations)
if representation is None:
return None
return compact_dict(
{
"stream_id": representation.representation_id,
"file_name": asset.metadata.get("file_name", asset.title),
"mime_type": representation.media_type,
"length": representation.size_bytes,
"digest": representation.digest,
"storage_ref": representation.storage_ref,
"kind": representation.kind.value,
}
)
def version_properties(
self,
asset: KnowledgeAsset,
current_version: AssetVersion | None,
versions: list[AssetVersion] | tuple[AssetVersion, ...],
) -> dict[str, Any]:
if current_version is None:
return {
"cmis:isLatestVersion": True,
"cmis:isMajorVersion": True,
"cmis:isLatestMajorVersion": True,
"cmis:versionSeriesId": f"cmis:version-series:{asset.id}",
"cmis:versionLabel": "1",
}
latest_sequence = max((version.sequence for version in versions), default=current_version.sequence)
return {
"cmis:isLatestVersion": current_version.sequence == latest_sequence,
"cmis:isMajorVersion": True,
"cmis:isLatestMajorVersion": current_version.sequence == latest_sequence,
"cmis:versionSeriesId": f"cmis:version-series:{asset.id}",
"cmis:versionLabel": str(current_version.sequence),
"kontextual:versionId": current_version.version_id,
"kontextual:versionChangeType": current_version.change_type.value,
}
def allowable_actions(
self,
context: OperationContext,
*,
has_content_stream: bool,
) -> tuple[CMISAction, ...]:
candidates = [
CMISAction.GET_OBJECT,
CMISAction.GET_CONTENT_STREAM,
CMISAction.GET_RELATIONSHIPS,
CMISAction.UPDATE_PROPERTIES,
CMISAction.DELETE_OBJECT,
CMISAction.SET_CONTENT_STREAM,
]
actions: list[CMISAction] = []
for action in candidates:
if action == CMISAction.GET_CONTENT_STREAM and not has_content_stream:
continue
if self.access_point.decide_action(action, context).allowed:
actions.append(action)
return tuple(actions)
def _read_capabilities() -> tuple[CMISCapability, ...]:
return (
CMISCapability.REPOSITORY,
@@ -425,3 +710,76 @@ def _read_capabilities() -> tuple[CMISCapability, ...]:
def _enum_value(value: Any) -> Any:
return getattr(value, "value", value)
def _type_definition(
base_type_id: CMISBaseType,
type_id: str,
display_name: str,
can_write: bool,
) -> dict[str, Any]:
return {
"id": type_id,
"local_name": type_id.split(":", 1)[-1],
"display_name": display_name,
"base_type_id": base_type_id.value,
"queryable": True,
"controllable_acl": base_type_id in {CMISBaseType.DOCUMENT, CMISBaseType.FOLDER},
"controllable_policy": False,
"creatable": can_write and base_type_id == CMISBaseType.DOCUMENT,
"fileable": base_type_id == CMISBaseType.DOCUMENT,
"fulltext_indexed": False,
"included_in_supertype_query": True,
"versionable": base_type_id == CMISBaseType.DOCUMENT,
"property_definitions": _property_definitions(base_type_id),
}
def _property_definitions(base_type_id: CMISBaseType) -> dict[str, dict[str, Any]]:
definitions = {
"cmis:objectId": {"property_type": "id", "cardinality": "single", "required": True},
"cmis:name": {"property_type": "string", "cardinality": "single", "required": True},
"cmis:baseTypeId": {"property_type": "id", "cardinality": "single", "required": True},
"cmis:objectTypeId": {"property_type": "id", "cardinality": "single", "required": True},
"kontextual:sensitivity": {"property_type": "string", "cardinality": "single", "required": False},
"kontextual:lifecycle": {"property_type": "string", "cardinality": "single", "required": False},
}
if base_type_id == CMISBaseType.RELATIONSHIP:
definitions["cmis:sourceId"] = {"property_type": "id", "cardinality": "single", "required": True}
definitions["cmis:targetId"] = {"property_type": "id", "cardinality": "single", "required": True}
return definitions
def _current_version(
asset: KnowledgeAsset,
versions: list[AssetVersion] | tuple[AssetVersion, ...],
) -> AssetVersion | None:
if asset.current_version_id:
for version in versions:
if version.version_id == asset.current_version_id:
return version
if versions:
return sorted(versions, key=lambda version: version.sequence)[-1]
return None
def _preferred_representation(
representations: list[AssetRepresentation] | tuple[AssetRepresentation, ...],
) -> AssetRepresentation | None:
if not representations:
return None
priority = {
RepresentationKind.SOURCE: 0,
RepresentationKind.NORMALIZED: 1,
RepresentationKind.DERIVED: 2,
}
return sorted(representations, key=lambda item: priority.get(item.kind, 99))[0]
def _normalize_path(path: str) -> str:
parts = [_safe_path_segment(part) for part in path.replace("\\", "/").split("/") if part]
return "/" + "/".join(parts)
def _safe_path_segment(value: str) -> str:
return str(value).strip().strip("/") or "_"

View File

@@ -0,0 +1,178 @@
from __future__ import annotations
from kontextual_engine import (
Actor,
ActorType,
AssetRepresentation,
AssetVersion,
CMISAccessPoint,
CMISAccessProfile,
CMISAction,
CMISBaseType,
CMISDomainMapper,
Classification,
CoreRelationship,
KnowledgeAsset,
MetadataRecord,
OperationContext,
RelationshipTargetKind,
RepresentationKind,
SourceReference,
VersionChangeType,
)
def _context(actor_type: ActorType = ActorType.HUMAN) -> OperationContext:
return OperationContext.create(
Actor.create(actor_type, actor_id=f"actor-{actor_type.value}"),
correlation_id="corr-cmis-map",
)
def _mapper(profile: CMISAccessProfile | None = None) -> CMISDomainMapper:
return CMISDomainMapper(
CMISAccessPoint(
access_point_id="cmis-test",
repository_id="kontextual-test",
profile=profile or CMISAccessProfile.readonly_browser(),
base_path="/cmis/test/browser",
metadata={"repository_name": "Kontextual Test Repository"},
)
)
def _asset(sensitivity: str = "internal") -> KnowledgeAsset:
return KnowledgeAsset.create(
"Decision Record",
Classification(
asset_type="document",
sensitivity=sensitivity,
topics=("architecture", "cmis"),
owner="Platform Knowledge",
review_state="approved",
),
asset_id="asset-decision-record",
source_refs=[
SourceReference(
source_system="sharepoint",
path="Architecture/ADR 0001.md",
external_id="sp-adr-0001",
)
],
metadata={"file_name": "ADR 0001.md", "source_system": "sharepoint"},
)
def test_mapper_exposes_repository_info_capabilities_and_base_type_definitions() -> None:
mapper = _mapper()
repository = mapper.repository_info()
types = {definition["base_type_id"]: definition for definition in mapper.type_definitions()}
assert repository["repository_id"] == "kontextual-test"
assert repository["repository_name"] == "Kontextual Test Repository"
assert repository["cmis_version_supported"] == "1.1"
assert repository["binding"] == "browser"
assert repository["capabilities"]["capability_query"] == "metadataonly"
assert repository["capabilities"]["capability_multifiling"] is False
assert set(types) == {
"cmis:document",
"cmis:folder",
"cmis:relationship",
"cmis:policy",
"cmis:item",
"cmis:secondary",
}
assert types["cmis:document"]["property_definitions"]["cmis:objectId"]["required"] is True
def test_mapper_projects_asset_to_cmis_document_envelope() -> None:
mapper = _mapper()
asset = _asset()
representation = AssetRepresentation.from_content(
asset.id,
RepresentationKind.SOURCE,
"text/markdown",
"# Decision Record",
storage_ref="memory://asset-decision-record/source",
representation_id="repr-source",
)
version = AssetVersion(
asset_id=asset.id,
sequence=3,
change_type=VersionChangeType.CONTENT_CHANGED,
representation_ids=("repr-source",),
version_id="ver-current",
)
asset = asset.with_current_version(version.version_id)
projection = mapper.map_asset(
asset,
_context(),
representations=[representation],
versions=[version],
relationship_ids=["cmis:relationship:rel-derived"],
metadata_records=[MetadataRecord("status", "accepted", confirmed=True)],
)
assert projection is not None
serialized = projection.to_dict()
assert serialized["object_id"] == "cmis:asset:asset-decision-record"
assert serialized["base_type_id"] == CMISBaseType.DOCUMENT.value
assert serialized["path"] == "/sources/sharepoint/Architecture/ADR 0001.md"
assert serialized["properties"]["cmis:objectTypeId"] == "kontextual:document"
assert serialized["properties"]["kontextual:metadata:status"] == "accepted"
assert serialized["content_stream"]["mime_type"] == "text/markdown"
assert serialized["version"]["cmis:versionLabel"] == "3"
assert serialized["relationships"] == ["cmis:relationship:rel-derived"]
assert CMISAction.GET_CONTENT_STREAM.value in serialized["allowable_actions"]
assert CMISAction.UPDATE_PROPERTIES.value not in serialized["allowable_actions"]
def test_governed_authoring_projection_includes_write_allowable_actions() -> None:
mapper = _mapper(CMISAccessProfile.governed_authoring())
asset = _asset()
representation = AssetRepresentation.from_content(
asset.id,
RepresentationKind.NORMALIZED,
"application/json",
"{}",
)
projection = mapper.map_asset(asset, _context(), representations=[representation])
assert projection is not None
actions = {action.value for action in projection.allowable_actions}
assert {
CMISAction.UPDATE_PROPERTIES.value,
CMISAction.DELETE_OBJECT.value,
CMISAction.SET_CONTENT_STREAM.value,
} <= actions
def test_mapper_omits_assets_not_visible_through_profile() -> None:
mapper = _mapper(CMISAccessProfile.readonly_browser())
assert mapper.map_asset(_asset("confidential"), _context()) is None
def test_mapper_projects_relationship_objects() -> None:
mapper = _mapper()
relationship = CoreRelationship(
source_id="asset-source",
target_id="asset-target",
predicate="derived_from",
target_kind=RelationshipTargetKind.ASSET,
confidence=0.91,
relationship_id="rel-derived",
)
projection = mapper.map_relationship(relationship, _context())
assert projection is not None
serialized = projection.to_dict()
assert serialized["object_id"] == "cmis:relationship:rel-derived"
assert serialized["base_type_id"] == CMISBaseType.RELATIONSHIP.value
assert serialized["properties"]["cmis:sourceId"] == "cmis:asset:asset-source"
assert serialized["properties"]["cmis:targetId"] == "cmis:asset:asset-target"
assert serialized["properties"]["kontextual:predicate"] == "derived_from"

View File

@@ -43,6 +43,7 @@ suite.
- `docs/cmis-profiled-access-points-implementation.md`
- `src/kontextual_engine/core/cmis.py`
- `tests/cmis/test_cmis_access_profiles.py`
- `tests/cmis/test_cmis_domain_mapper.py`
## Architecture Constraint
@@ -73,7 +74,7 @@ Acceptance:
```task
id: KONT-WP-0012-T002
status: todo
status: done
priority: high
state_hub_task_id: "a4c44471-22a9-40d9-9821-4b78e5ba9360"
```