query parsing and diagnostics

This commit is contained in:
2026-05-14 02:20:17 +02:00
parent a152968466
commit e5197e15e2
13 changed files with 777 additions and 90 deletions

View File

@@ -7,6 +7,7 @@ requests into service/runtime contracts and must not own domain behavior.
from __future__ import annotations
import json
import re
from dataclasses import dataclass, field, replace
from datetime import datetime
from email import policy
@@ -90,6 +91,43 @@ from kontextual_engine.services import (
API_VERSION = "v1"
OPENAPI_VERSION = "1.0.0"
CMIS_APPEND_MAX_COMPOSED_BYTES = 64 * 1024 * 1024
CMIS_QUERY_SUPPORTED = [
"SELECT * FROM cmis:document",
"SELECT * FROM kontextual:document",
"SELECT * FROM cmis:document WHERE <filterable-field> = '<value>' [AND ...]",
"SELECT * FROM cmis:document WHERE <text-field> LIKE '<pattern>' [AND ...]",
"SELECT * FROM cmis:document WHERE <multi-field> IN ('<value>', ...) [AND ...]",
"SELECT * FROM cmis:document ... ORDER BY <orderable-field> [ASC|DESC]",
]
CMIS_QUERY_FILTERABLE_FIELDS = {
"cmis:objectId",
"cmis:name",
"cmis:objectTypeId",
"cmis:baseTypeId",
"cmis:description",
"kontextual:assetId",
"kontextual:assetType",
"kontextual:sensitivity",
"kontextual:lifecycle",
"kontextual:owner",
"kontextual:topics",
"kontextual:reviewState",
}
CMIS_QUERY_ORDERABLE_FIELDS = {
"cmis:objectId",
"cmis:name",
"cmis:creationDate",
"cmis:lastModificationDate",
}
CMIS_QUERY_LIKE_FIELDS = {
"cmis:name",
"cmis:description",
"kontextual:assetId",
"kontextual:assetType",
"kontextual:owner",
"kontextual:topics",
"kontextual:reviewState",
}
AGENT_OPERATION_CATALOG: tuple[dict[str, Any], ...] = (
@@ -1593,19 +1631,13 @@ class ServiceRuntime:
decision = mapper.access_point.decide_action(CMISAction.QUERY, context)
if not decision.allowed:
raise _cmis_authorization_error(decision, "query")
normalized = query.strip().lower()
if normalized not in {"select * from cmis:document", "select * from kontextual:document"}:
raise ValidationError(
"Unsupported CMIS query subset",
details={
"query": query,
"supported": ["SELECT * FROM cmis:document", "SELECT * FROM kontextual:document"],
},
)
query_spec = _parse_cmis_query(query)
projections = self._cmis_document_projections(mapper, context)
projections = _apply_cmis_query_spec(projections, query_spec)
paged = projections[max(skip_count, 0) : max(skip_count, 0) + max(max_items, 0)]
return {
"query": query,
"query_spec": query_spec,
"results": paged,
"num_items": len(paged),
"has_more_items": len(projections) > max(skip_count, 0) + len(paged),
@@ -1618,19 +1650,55 @@ class ServiceRuntime:
context: OperationContext,
*,
object_id: str | None = None,
target_id: str | None = None,
relationship_direction: str | None = None,
) -> dict[str, Any]:
mapper = self._cmis_mapper(access_point_id)
decision = mapper.access_point.decide_action(CMISAction.GET_RELATIONSHIPS, context)
if not decision.allowed:
raise _cmis_authorization_error(decision, "getRelationships")
source_id = _cmis_asset_id(object_id) if object_id else None
direction = (relationship_direction or "source").strip().lower()
if direction not in {"source", "target", "either"}:
raise ValidationError(
"Unsupported CMIS relationship direction",
details={
"code": "cmis.relationship_direction_unsupported",
"operation": "getObjectRelationships",
"direction": relationship_direction,
"supported": ["source", "target", "either"],
},
)
source_filter = _cmis_asset_id(object_id) if object_id and direction == "source" else None
target_filter = _cmis_asset_id(object_id) if object_id and direction == "target" else None
if target_id:
target_filter = _cmis_asset_id(target_id)
relationships = self.repository.list_relationships(source_id=source_filter, target_id=target_filter)
if object_id and direction == "either":
asset_id = _cmis_asset_id(object_id)
relationships = [
relationship
for relationship in relationships
if relationship.source_id == asset_id
or (
relationship.target_kind == RelationshipTargetKind.ASSET
and relationship.target_id == asset_id
)
]
projections = [
projection.to_dict()
for relationship in self.repository.list_relationships(source_id=source_id)
for relationship in relationships
if self._cmis_relationship_visible(mapper, relationship, context)
if (projection := mapper.map_relationship(relationship, context))
]
return {"items": projections, "count": len(projections)}
return {
"items": projections,
"count": len(projections),
"filters": {
"object_id": object_id,
"target_id": target_id,
"relationship_direction": direction,
},
}
def cmis_change_log(
self,
@@ -3629,26 +3697,38 @@ def create_app(runtime: ServiceRuntime | None = None):
)
def unsupported_browser_selector(selector: str | None) -> dict[str, Any]:
unsupported_details: dict[str, Any] = {
"cmisselector": selector,
"supported": [
"repositoryInfo",
"typeChildren",
"typeDescendants",
"typeDefinition",
"query",
"object",
"children",
"parent",
"parents",
"properties",
"allowableActions",
"policies",
"content",
],
}
if selector in {"descendants", "folderTree"}:
unsupported_details.update(
{
"code": "cmis.not_supported",
"cmis_exception": "notSupported",
"unsupported_feature": "get_descendants"
if selector == "descendants"
else "get_folder_tree",
"release_contract": "Navigation tree selectors remain unsupported for the first release.",
}
)
raise ValidationError(
"Unsupported CMIS Browser Binding selector",
details={
"cmisselector": selector,
"supported": [
"repositoryInfo",
"typeChildren",
"typeDescendants",
"typeDefinition",
"query",
"object",
"children",
"parent",
"parents",
"properties",
"allowableActions",
"policies",
"content",
],
},
details=unsupported_details,
)
async def browser_action_payload(request: Request) -> dict[str, Any]:
@@ -4230,9 +4310,18 @@ def create_app(runtime: ServiceRuntime | None = None):
def cmis_relationships(
access_point_id: str,
object_id: str | None = Query(None),
target_id: str | None = Query(None, alias="targetId"),
relationship_direction: str | None = Query(None, alias="relationshipDirection"),
context: OperationContext = Depends(context_from_headers),
) -> dict[str, Any]:
return response(runtime.cmis_relationships, access_point_id, context, object_id=object_id)
return response(
runtime.cmis_relationships,
access_point_id,
context,
object_id=object_id,
target_id=target_id,
relationship_direction=relationship_direction,
)
@app.get("/cmis/{access_point_id}/browser/changes", tags=["cmis"])
def cmis_changes(
@@ -4702,6 +4791,212 @@ def _normalize_cmis_path(path: str) -> str:
return "/" + "/".join(parts)
_CMIS_QUERY_RE = re.compile(
r"^\s*SELECT\s+(?P<select>\*|[A-Za-z0-9_:\s,]+)\s+FROM\s+(?P<from>[A-Za-z0-9_:]+)"
r"(?:\s+WHERE\s+(?P<where>.*?))?"
r"(?:\s+ORDER\s+BY\s+(?P<order>[A-Za-z0-9_:]+)(?:\s+(?P<direction>ASC|DESC))?)?\s*$",
re.IGNORECASE,
)
_CMIS_QUERY_CONDITION_RE = re.compile(
r"^(?P<field>[A-Za-z0-9_:]+)\s*(?P<op>=|LIKE)\s*(?P<value>'.*?'|\".*?\"|[^\s]+)\s*$",
re.IGNORECASE,
)
_CMIS_QUERY_IN_RE = re.compile(
r"^(?P<field>[A-Za-z0-9_:]+)\s+IN\s*\((?P<values>.*)\)\s*$",
re.IGNORECASE,
)
def _parse_cmis_query(query: str) -> dict[str, Any]:
match = _CMIS_QUERY_RE.match(query)
if not match:
raise _unsupported_cmis_query(query, "Only a bounded SELECT/FROM/WHERE/ORDER BY subset is supported.")
selected = " ".join(match.group("select").split())
if selected != "*":
raise _unsupported_cmis_query(query, "Only SELECT * is supported in the release-stable subset.")
type_id = match.group("from")
if type_id not in {CMISBaseType.DOCUMENT.value, "kontextual:document"}:
raise _unsupported_cmis_query(query, "Only cmis:document and kontextual:document are queryable.")
conditions = _parse_cmis_query_conditions(query, match.group("where"))
order_by = match.group("order")
direction = (match.group("direction") or "ASC").upper()
if order_by and order_by not in CMIS_QUERY_ORDERABLE_FIELDS:
raise _unsupported_cmis_query(
query,
"ORDER BY is supported only for common CMIS document fields.",
field=order_by,
)
return {"type_id": type_id, "conditions": conditions, "order_by": order_by, "direction": direction}
def _parse_cmis_query_conditions(query: str, where_clause: str | None) -> list[dict[str, Any]]:
if not where_clause:
return []
conditions: list[dict[str, Any]] = []
for raw_condition in re.split(r"\s+AND\s+", where_clause, flags=re.IGNORECASE):
condition = raw_condition.strip()
if not condition:
raise _unsupported_cmis_query(query, "Empty WHERE predicates are not supported.")
in_match = _CMIS_QUERY_IN_RE.match(condition)
if in_match:
field = in_match.group("field")
_validate_cmis_query_field(query, field)
conditions.append(
{
"field": field,
"operator": "IN",
"values": _cmis_query_literal_list(query, in_match.group("values")),
}
)
continue
if re.search(r"\bOR\b|\(|\)", condition, re.IGNORECASE):
raise _unsupported_cmis_query(
query,
"Only AND-combined simple predicates are supported.",
predicate=condition,
)
match = _CMIS_QUERY_CONDITION_RE.match(condition)
if not match:
raise _unsupported_cmis_query(query, "Unsupported CMIS query predicate.", predicate=condition)
field = match.group("field")
operator = match.group("op").upper()
_validate_cmis_query_field(query, field)
if operator == "LIKE" and field not in CMIS_QUERY_LIKE_FIELDS:
raise _unsupported_cmis_query(
query,
"LIKE is supported only for text-like CMIS release fields.",
field=field,
)
conditions.append(
{
"field": field,
"operator": operator,
"value": _cmis_query_unquote(match.group("value")),
}
)
return conditions
def _validate_cmis_query_field(query: str, field: str) -> None:
if field not in CMIS_QUERY_FILTERABLE_FIELDS:
raise _unsupported_cmis_query(
query,
"WHERE predicates are supported only for release-stable filterable fields.",
field=field,
)
def _apply_cmis_query_spec(projections: list[dict[str, Any]], query_spec: dict[str, Any]) -> list[dict[str, Any]]:
filtered = [
projection
for projection in projections
if all(_cmis_query_condition_matches(projection, condition) for condition in query_spec["conditions"])
]
order_by = query_spec.get("order_by")
if not order_by:
return filtered
populated = [projection for projection in filtered if _cmis_query_values(projection, order_by)]
empty = [projection for projection in filtered if not _cmis_query_values(projection, order_by)]
return sorted(
populated,
key=lambda projection: _cmis_query_sort_key(projection, order_by),
reverse=query_spec.get("direction") == "DESC",
) + empty
def _cmis_query_condition_matches(projection: dict[str, Any], condition: dict[str, Any]) -> bool:
values = [str(value) for value in _cmis_query_values(projection, condition["field"])]
if not values:
return False
if condition["operator"] == "IN":
expected = {str(value) for value in condition["values"]}
return any(value in expected for value in values)
expected_value = str(condition["value"])
if condition["operator"] == "LIKE":
return any(_cmis_query_like_supported(value, expected_value) for value in values)
return any(value == expected_value for value in values)
def _cmis_query_values(projection: dict[str, Any], field: str) -> list[Any]:
if field == "cmis:objectId":
value = projection.get("object_id")
elif field == "cmis:name":
value = projection.get("name")
elif field == "cmis:baseTypeId":
value = projection.get("base_type_id")
elif field == "cmis:objectTypeId":
value = projection.get("type_id")
else:
value = dict(projection.get("properties", {})).get(field)
if value is None:
return []
if isinstance(value, (list, tuple, set)):
return [item for item in value if item is not None]
return [value]
def _cmis_query_sort_key(projection: dict[str, Any], field: str) -> tuple[str, str]:
values = _cmis_query_values(projection, field)
if not values:
return ("", "")
value = values[0]
return (type(value).__name__, str(value).lower())
def _cmis_query_like_supported(value: str, pattern: str) -> bool:
expression = "".join(".*" if char == "%" else "." if char == "_" else re.escape(char) for char in pattern)
return re.match(f"^{expression}$", value, flags=re.IGNORECASE) is not None
def _cmis_query_literal_list(query: str, value: str) -> list[str]:
values: list[str] = []
current: list[str] = []
quote: str | None = None
for char in value:
if char in {"'", '"'}:
if quote == char:
quote = None
elif quote is None:
quote = char
current.append(char)
elif char == "," and quote is None:
literal = "".join(current).strip()
if literal:
values.append(_cmis_query_unquote(literal))
current = []
else:
current.append(char)
if quote is not None:
raise _unsupported_cmis_query(query, "Unclosed quoted literal in IN predicate.")
literal = "".join(current).strip()
if literal:
values.append(_cmis_query_unquote(literal))
return values
def _cmis_query_unquote(value: str) -> str:
stripped = value.strip()
if len(stripped) >= 2 and stripped[0] == stripped[-1] and stripped[0] in {"'", '"'}:
return stripped[1:-1]
return stripped
def _unsupported_cmis_query(query: str, reason: str, **details: Any) -> ValidationError:
return ValidationError(
"Unsupported CMIS query subset",
details={
"code": "cmis.not_supported",
"cmis_exception": "notSupported",
"query": query,
"reason": reason,
"supported": CMIS_QUERY_SUPPORTED,
"filterable_fields": sorted(CMIS_QUERY_FILTERABLE_FIELDS),
"orderable_fields": sorted(CMIS_QUERY_ORDERABLE_FIELDS),
**details,
},
)
def _cmis_media_type(value: Any) -> str:
media_type = str(value or "application/octet-stream").split(";", 1)[0].strip()
return media_type or "application/octet-stream"

View File

@@ -16,7 +16,7 @@ from .metadata import LifecycleState, Sensitivity
from .policy import PolicyDecision
from .provenance import AssetVersion
from .relationships import CoreRelationship, RelationshipTargetKind
from .primitives import compact_dict
from .primitives import compact_dict, stable_json_dumps
class CMISBinding(str, Enum):
@@ -190,7 +190,11 @@ UNSUPPORTED_FEATURES: dict[str, dict[str, Any]] = {
"reason": "query_not_supported",
"standard_flag": "capability_join",
},
"order_by": {"status": "unsupported", "reason": "query_not_supported", "standard_flag": "capability_order_by"},
"custom_order_by": {
"status": "unsupported",
"reason": "custom_order_by_not_supported",
"standard_flag": "capability_order_by",
},
"apply_acl": {"status": "unsupported", "reason": "operation_not_implemented", "standard_flag": "capability_acl"},
"apply_policy": {"status": "unsupported", "reason": "capability_not_supported"},
"remove_policy": {"status": "unsupported", "reason": "capability_not_supported"},
@@ -636,7 +640,7 @@ class CMISDomainMapper:
"capability_renditions": "none",
"capability_get_descendants": False,
"capability_get_folder_tree": False,
"capability_order_by": "none",
"capability_order_by": "common",
"capability_multifiling": False,
"capability_unfiling": False,
"capability_version_specific_filing": False,
@@ -747,11 +751,19 @@ class CMISDomainMapper:
"cmis:name": relationship.predicate,
"cmis:baseTypeId": CMISBaseType.RELATIONSHIP.value,
"cmis:objectTypeId": "kontextual:relationship",
"cmis:changeToken": f"relationship:{relationship.relationship_id}:{relationship.created_at}",
"cmis:sourceId": source_id,
"cmis:targetId": target_id,
"kontextual:relationshipId": relationship.relationship_id,
"kontextual:predicate": relationship.predicate,
"kontextual:confidence": relationship.confidence,
"kontextual:targetKind": relationship.target_kind.value,
"kontextual:direction": relationship.direction,
"kontextual:validFrom": relationship.valid_from,
"kontextual:validTo": relationship.valid_to,
"kontextual:actorId": relationship.actor_id,
"kontextual:createdAt": relationship.created_at,
"kontextual:provenance": stable_json_dumps(relationship.provenance),
},
allowable_actions=(CMISAction.GET_OBJECT, CMISAction.GET_RELATIONSHIPS),
)
@@ -766,16 +778,22 @@ class CMISDomainMapper:
entries = [
{
"principal_id": context.actor.id,
"principal_kind": context.actor.actor_type.value,
"permissions": permissions,
"direct": True,
"inherited": False,
"source": "request-actor-profile",
}
]
if asset.classification.sensitivity == Sensitivity.PUBLIC:
entries.append(
{
"principal_id": "anyone",
"principal_kind": "well_known",
"permissions": ["cmis:read"],
"direct": False,
"inherited": True,
"source": "public-sensitivity",
}
)
return {
@@ -783,6 +801,13 @@ class CMISDomainMapper:
"is_exact": True,
"aces": entries,
"derived_from": "kontextual-profile-policy",
"visibility_reason": visibility.reason,
"permission_mapping": {
"cmis:read": "asset visible through profile policy",
"cmis:write": "profile allows governed mutations",
"cmis:delete": "profile allows governed delete requests",
},
"policy_authority": "kontextual-policy-gateway",
"profile": self.access_point.profile.name,
}
@@ -1500,8 +1525,8 @@ def _browser_type_display_name(type_definition: dict[str, Any]) -> str:
def _browser_standard_property_definitions(base_id: str) -> dict[str, dict[str, Any]]:
common = {
"cmis:objectId": _browser_propdef("id", required=False, updatability="readonly"),
"cmis:name": _browser_propdef("string", required=True, updatability="readwrite"),
"cmis:objectId": _browser_propdef("id", required=False, updatability="readonly", orderable=True),
"cmis:name": _browser_propdef("string", required=True, updatability="readwrite", orderable=True),
"cmis:baseTypeId": _browser_propdef("id", required=False, updatability="readonly"),
"cmis:objectTypeId": _browser_propdef("id", required=True, updatability="oncreate"),
"cmis:createdBy": _browser_propdef(
@@ -1713,14 +1738,34 @@ def _type_definition(
def _property_definitions(base_type_id: CMISBaseType) -> dict[str, dict[str, Any]]:
definitions = {
"cmis:objectId": {"property_type": "id", "cardinality": "single", "required": True},
"cmis:name": {"property_type": "string", "cardinality": "single", "required": True},
"cmis:objectId": {
"property_type": "id",
"cardinality": "single",
"required": True,
"orderable": True,
},
"cmis:name": {
"property_type": "string",
"cardinality": "single",
"required": True,
"orderable": True,
},
"cmis:baseTypeId": {"property_type": "id", "cardinality": "single", "required": True},
"cmis:objectTypeId": {"property_type": "id", "cardinality": "single", "required": True},
"cmis:createdBy": {"property_type": "string", "cardinality": "single", "required": False},
"cmis:lastModifiedBy": {"property_type": "string", "cardinality": "single", "required": False},
"cmis:creationDate": {"property_type": "datetime", "cardinality": "single", "required": False},
"cmis:lastModificationDate": {"property_type": "datetime", "cardinality": "single", "required": False},
"cmis:creationDate": {
"property_type": "datetime",
"cardinality": "single",
"required": False,
"orderable": True,
},
"cmis:lastModificationDate": {
"property_type": "datetime",
"cardinality": "single",
"required": False,
"orderable": True,
},
"cmis:changeToken": {"property_type": "string", "cardinality": "single", "required": False},
"cmis:secondaryObjectTypeIds": {"property_type": "id", "cardinality": "multi", "required": False},
"cmis:description": {"property_type": "string", "cardinality": "single", "required": False},
@@ -1858,6 +1903,56 @@ def _property_definitions(base_type_id: CMISBaseType) -> dict[str, dict[str, Any
if base_type_id == CMISBaseType.RELATIONSHIP:
definitions["cmis:sourceId"] = {"property_type": "id", "cardinality": "single", "required": True}
definitions["cmis:targetId"] = {"property_type": "id", "cardinality": "single", "required": True}
definitions["kontextual:relationshipId"] = {
"property_type": "id",
"cardinality": "single",
"required": False,
}
definitions["kontextual:predicate"] = {
"property_type": "string",
"cardinality": "single",
"required": False,
}
definitions["kontextual:confidence"] = {
"property_type": "decimal",
"cardinality": "single",
"required": False,
}
definitions["kontextual:targetKind"] = {
"property_type": "string",
"cardinality": "single",
"required": False,
}
definitions["kontextual:direction"] = {
"property_type": "string",
"cardinality": "single",
"required": False,
}
definitions["kontextual:validFrom"] = {
"property_type": "datetime",
"cardinality": "single",
"required": False,
}
definitions["kontextual:validTo"] = {
"property_type": "datetime",
"cardinality": "single",
"required": False,
}
definitions["kontextual:actorId"] = {
"property_type": "string",
"cardinality": "single",
"required": False,
}
definitions["kontextual:createdAt"] = {
"property_type": "datetime",
"cardinality": "single",
"required": False,
}
definitions["kontextual:provenance"] = {
"property_type": "string",
"cardinality": "single",
"required": False,
}
return definitions