CMIS domain mapper

This commit is contained in:
2026-05-07 00:57:12 +02:00
parent 241522e74d
commit 41da61896b
6 changed files with 571 additions and 3 deletions

View File

@@ -9,6 +9,9 @@ from .cmis import (
CMISAction,
CMISBinding,
CMISCapability,
CMISBaseType,
CMISDomainMapper,
CMISObjectProjection,
)
from .idempotency import IdempotencyRecord, IdempotencyStatus
from .ingestion import (
@@ -77,8 +80,11 @@ __all__ = [
"CMISAccessPoint",
"CMISAccessProfile",
"CMISAction",
"CMISBaseType",
"CMISBinding",
"CMISCapability",
"CMISDomainMapper",
"CMISObjectProjection",
"ConnectorCapability",
"ContextEntity",
"ContextEntityType",

View File

@@ -11,9 +11,11 @@ from enum import Enum
from typing import Any
from .actors import ActorType, OperationContext
from .assets import KnowledgeAsset
from .assets import AssetRepresentation, KnowledgeAsset, RepresentationKind
from .metadata import Sensitivity
from .policy import PolicyDecision
from .provenance import AssetVersion
from .relationships import CoreRelationship, RelationshipTargetKind
from .primitives import compact_dict
@@ -60,6 +62,15 @@ class CMISAction(str, Enum):
BULK_UPDATE_PROPERTIES = "bulk_update_properties"
class CMISBaseType(str, Enum):
DOCUMENT = "cmis:document"
FOLDER = "cmis:folder"
RELATIONSHIP = "cmis:relationship"
POLICY = "cmis:policy"
ITEM = "cmis:item"
SECONDARY = "cmis:secondary"
ACTION_CAPABILITIES: dict[CMISAction, CMISCapability] = {
CMISAction.GET_REPOSITORY_INFO: CMISCapability.REPOSITORY,
CMISAction.GET_TYPE_DEFINITION: CMISCapability.TYPE_DEFINITIONS,
@@ -408,6 +419,280 @@ class CMISAccessPoint:
)
@dataclass(frozen=True)
class CMISObjectProjection:
object_id: str
base_type_id: CMISBaseType
type_id: str
name: str
properties: dict[str, Any]
allowable_actions: tuple[CMISAction, ...] = ()
path: str | None = None
content_stream: dict[str, Any] | None = None
version: dict[str, Any] | None = None
relationships: tuple[str, ...] = ()
def to_dict(self) -> dict[str, Any]:
return compact_dict(
{
"object_id": self.object_id,
"base_type_id": self.base_type_id.value,
"type_id": self.type_id,
"name": self.name,
"path": self.path,
"properties": dict(self.properties),
"allowable_actions": [action.value for action in self.allowable_actions],
"content_stream": dict(self.content_stream or {}),
"version": dict(self.version or {}),
"relationships": list(self.relationships),
}
)
class CMISDomainMapper:
"""Project engine domain objects into CMIS-shaped envelopes."""
def __init__(self, access_point: CMISAccessPoint) -> None:
self.access_point = access_point
def repository_info(self) -> dict[str, Any]:
profile = self.access_point.profile
return {
"repository_id": self.access_point.repository_id,
"repository_name": self.access_point.metadata.get("repository_name", self.access_point.repository_id),
"cmis_version_supported": "1.1",
"root_folder_id": self.access_point.root_folder_id,
"principal_anonymous": "anonymous",
"principal_anyone": "anyone",
"product_name": "kontextual-engine",
"binding": profile.binding.value,
"capabilities": self.capability_flags(),
"profile": profile.name,
}
def capability_flags(self) -> dict[str, Any]:
profile = self.access_point.profile
return {
"capability_content_stream_updatability": (
"anytime" if profile.has_capability(CMISCapability.CONTENT_STREAM_WRITE) else "none"
),
"capability_changes": "objectidsonly"
if profile.has_capability(CMISCapability.CHANGE_LOG)
else "none",
"capability_renditions": "read" if profile.has_capability(CMISCapability.RENDITIONS) else "none",
"capability_get_descendants": profile.has_capability(CMISCapability.NAVIGATION),
"capability_get_folder_tree": profile.has_capability(CMISCapability.NAVIGATION),
"capability_multifiling": False,
"capability_unfiling": False,
"capability_version_specific_filing": False,
"capability_pwc_searchable": False,
"capability_pwc_updatable": False,
"capability_all_versions_searchable": profile.has_capability(CMISCapability.VERSIONING),
"capability_query": "metadataonly"
if profile.has_capability(CMISCapability.DISCOVERY_QUERY)
else "none",
"capability_join": "none",
"capability_acl": "discover" if profile.has_capability(CMISCapability.ACL) else "none",
}
def type_definitions(self) -> list[dict[str, Any]]:
can_write = self.access_point.profile.allow_mutations
return [
_type_definition(CMISBaseType.DOCUMENT, "kontextual:document", "Kontextual Document", can_write),
_type_definition(CMISBaseType.FOLDER, "kontextual:folder", "Kontextual Folder", False),
_type_definition(
CMISBaseType.RELATIONSHIP,
"kontextual:relationship",
"Kontextual Relationship",
can_write,
),
_type_definition(CMISBaseType.POLICY, "kontextual:policy", "Kontextual Policy", False),
_type_definition(CMISBaseType.ITEM, "kontextual:item", "Kontextual Item", False),
_type_definition(CMISBaseType.SECONDARY, "kontextual:secondary", "Kontextual Secondary", False),
]
def map_asset(
self,
asset: KnowledgeAsset,
context: OperationContext,
*,
representations: list[AssetRepresentation] | tuple[AssetRepresentation, ...] = (),
versions: list[AssetVersion] | tuple[AssetVersion, ...] = (),
relationship_ids: list[str] | tuple[str, ...] = (),
metadata_records: list[Any] | tuple[Any, ...] = (),
) -> CMISObjectProjection | None:
if not self.access_point.exposes_asset(asset, context):
return None
current_version = _current_version(asset, versions)
content_stream = self.map_content_stream(asset, representations)
return CMISObjectProjection(
object_id=self.asset_object_id(asset.id),
base_type_id=CMISBaseType.DOCUMENT,
type_id=f"kontextual:{asset.classification.asset_type}",
name=asset.title,
path=self.asset_path(asset),
properties=self.asset_properties(asset, metadata_records=metadata_records),
allowable_actions=self.allowable_actions(context, has_content_stream=content_stream is not None),
content_stream=content_stream,
version=self.version_properties(asset, current_version, versions),
relationships=tuple(relationship_ids),
)
def map_relationship(
self,
relationship: CoreRelationship,
context: OperationContext,
) -> CMISObjectProjection | None:
decision = self.access_point.decide_action(
CMISAction.GET_RELATIONSHIPS,
context,
resource=f"relationship:{relationship.relationship_id}",
)
if not decision.allowed:
return None
source_id = self.asset_object_id(relationship.source_id)
target_id = (
self.asset_object_id(relationship.target_id)
if relationship.target_kind == RelationshipTargetKind.ASSET
else f"cmis:entity:{relationship.target_id}"
)
return CMISObjectProjection(
object_id=f"cmis:relationship:{relationship.relationship_id}",
base_type_id=CMISBaseType.RELATIONSHIP,
type_id="kontextual:relationship",
name=relationship.predicate,
properties={
"cmis:objectId": f"cmis:relationship:{relationship.relationship_id}",
"cmis:name": relationship.predicate,
"cmis:baseTypeId": CMISBaseType.RELATIONSHIP.value,
"cmis:objectTypeId": "kontextual:relationship",
"cmis:sourceId": source_id,
"cmis:targetId": target_id,
"kontextual:predicate": relationship.predicate,
"kontextual:confidence": relationship.confidence,
"kontextual:targetKind": relationship.target_kind.value,
},
allowable_actions=(CMISAction.GET_OBJECT, CMISAction.GET_RELATIONSHIPS),
)
def asset_object_id(self, asset_id: str) -> str:
return f"cmis:asset:{asset_id}"
def asset_path(self, asset: KnowledgeAsset) -> str:
explicit = asset.metadata.get("cmis_path")
if explicit:
return _normalize_path(str(explicit))
if asset.source_refs:
source_ref = asset.source_refs[0]
source_root = _safe_path_segment(source_ref.source_system)
if source_ref.path:
return _normalize_path(f"/sources/{source_root}/{source_ref.path}")
if source_ref.external_id:
return _normalize_path(f"/sources/{source_root}/{source_ref.external_id}")
topics = asset.classification.topics
if topics:
return _normalize_path(f"/topics/{topics[0]}/{asset.id}")
return _normalize_path(f"/assets/{asset.classification.asset_type}/{asset.id}")
def asset_properties(
self,
asset: KnowledgeAsset,
*,
metadata_records: list[Any] | tuple[Any, ...] = (),
) -> dict[str, Any]:
classification = asset.classification
properties = {
"cmis:objectId": self.asset_object_id(asset.id),
"cmis:name": asset.title,
"cmis:baseTypeId": CMISBaseType.DOCUMENT.value,
"cmis:objectTypeId": f"kontextual:{classification.asset_type}",
"cmis:createdBy": asset.metadata.get("created_by"),
"cmis:lastModifiedBy": asset.metadata.get("updated_by"),
"cmis:creationDate": asset.created_at,
"cmis:lastModificationDate": asset.updated_at,
"cmis:changeToken": asset.current_version_id,
"kontextual:assetId": asset.id,
"kontextual:assetType": classification.asset_type,
"kontextual:sensitivity": _enum_value(classification.sensitivity),
"kontextual:lifecycle": _enum_value(asset.lifecycle),
"kontextual:owner": classification.owner,
"kontextual:topics": list(classification.topics),
"kontextual:reviewState": classification.review_state,
}
for record in metadata_records:
key = getattr(record, "key", None)
if key:
properties[f"kontextual:metadata:{key}"] = getattr(record, "value", None)
return compact_dict(properties)
def map_content_stream(
self,
asset: KnowledgeAsset,
representations: list[AssetRepresentation] | tuple[AssetRepresentation, ...],
) -> dict[str, Any] | None:
representation = _preferred_representation(representations)
if representation is None:
return None
return compact_dict(
{
"stream_id": representation.representation_id,
"file_name": asset.metadata.get("file_name", asset.title),
"mime_type": representation.media_type,
"length": representation.size_bytes,
"digest": representation.digest,
"storage_ref": representation.storage_ref,
"kind": representation.kind.value,
}
)
def version_properties(
self,
asset: KnowledgeAsset,
current_version: AssetVersion | None,
versions: list[AssetVersion] | tuple[AssetVersion, ...],
) -> dict[str, Any]:
if current_version is None:
return {
"cmis:isLatestVersion": True,
"cmis:isMajorVersion": True,
"cmis:isLatestMajorVersion": True,
"cmis:versionSeriesId": f"cmis:version-series:{asset.id}",
"cmis:versionLabel": "1",
}
latest_sequence = max((version.sequence for version in versions), default=current_version.sequence)
return {
"cmis:isLatestVersion": current_version.sequence == latest_sequence,
"cmis:isMajorVersion": True,
"cmis:isLatestMajorVersion": current_version.sequence == latest_sequence,
"cmis:versionSeriesId": f"cmis:version-series:{asset.id}",
"cmis:versionLabel": str(current_version.sequence),
"kontextual:versionId": current_version.version_id,
"kontextual:versionChangeType": current_version.change_type.value,
}
def allowable_actions(
self,
context: OperationContext,
*,
has_content_stream: bool,
) -> tuple[CMISAction, ...]:
candidates = [
CMISAction.GET_OBJECT,
CMISAction.GET_CONTENT_STREAM,
CMISAction.GET_RELATIONSHIPS,
CMISAction.UPDATE_PROPERTIES,
CMISAction.DELETE_OBJECT,
CMISAction.SET_CONTENT_STREAM,
]
actions: list[CMISAction] = []
for action in candidates:
if action == CMISAction.GET_CONTENT_STREAM and not has_content_stream:
continue
if self.access_point.decide_action(action, context).allowed:
actions.append(action)
return tuple(actions)
def _read_capabilities() -> tuple[CMISCapability, ...]:
return (
CMISCapability.REPOSITORY,
@@ -425,3 +710,76 @@ def _read_capabilities() -> tuple[CMISCapability, ...]:
def _enum_value(value: Any) -> Any:
return getattr(value, "value", value)
def _type_definition(
base_type_id: CMISBaseType,
type_id: str,
display_name: str,
can_write: bool,
) -> dict[str, Any]:
return {
"id": type_id,
"local_name": type_id.split(":", 1)[-1],
"display_name": display_name,
"base_type_id": base_type_id.value,
"queryable": True,
"controllable_acl": base_type_id in {CMISBaseType.DOCUMENT, CMISBaseType.FOLDER},
"controllable_policy": False,
"creatable": can_write and base_type_id == CMISBaseType.DOCUMENT,
"fileable": base_type_id == CMISBaseType.DOCUMENT,
"fulltext_indexed": False,
"included_in_supertype_query": True,
"versionable": base_type_id == CMISBaseType.DOCUMENT,
"property_definitions": _property_definitions(base_type_id),
}
def _property_definitions(base_type_id: CMISBaseType) -> dict[str, dict[str, Any]]:
definitions = {
"cmis:objectId": {"property_type": "id", "cardinality": "single", "required": True},
"cmis:name": {"property_type": "string", "cardinality": "single", "required": True},
"cmis:baseTypeId": {"property_type": "id", "cardinality": "single", "required": True},
"cmis:objectTypeId": {"property_type": "id", "cardinality": "single", "required": True},
"kontextual:sensitivity": {"property_type": "string", "cardinality": "single", "required": False},
"kontextual:lifecycle": {"property_type": "string", "cardinality": "single", "required": False},
}
if base_type_id == CMISBaseType.RELATIONSHIP:
definitions["cmis:sourceId"] = {"property_type": "id", "cardinality": "single", "required": True}
definitions["cmis:targetId"] = {"property_type": "id", "cardinality": "single", "required": True}
return definitions
def _current_version(
asset: KnowledgeAsset,
versions: list[AssetVersion] | tuple[AssetVersion, ...],
) -> AssetVersion | None:
if asset.current_version_id:
for version in versions:
if version.version_id == asset.current_version_id:
return version
if versions:
return sorted(versions, key=lambda version: version.sequence)[-1]
return None
def _preferred_representation(
representations: list[AssetRepresentation] | tuple[AssetRepresentation, ...],
) -> AssetRepresentation | None:
if not representations:
return None
priority = {
RepresentationKind.SOURCE: 0,
RepresentationKind.NORMALIZED: 1,
RepresentationKind.DERIVED: 2,
}
return sorted(representations, key=lambda item: priority.get(item.kind, 99))[0]
def _normalize_path(path: str) -> str:
parts = [_safe_path_segment(part) for part in path.replace("\\", "/").split("/") if part]
return "/" + "/".join(parts)
def _safe_path_segment(value: str) -> str:
return str(value).strip().strip("/") or "_"