content-addressed blob storage: blob_storage.py, memory, local, and S3 adapters

This commit is contained in:
2026-05-07 03:51:25 +02:00
parent c2bc7071d7
commit ebace73761
22 changed files with 1489 additions and 47 deletions

View File

@@ -12,7 +12,7 @@ from datetime import datetime
from importlib import metadata
from typing import Any
from kontextual_engine.adapters.memory import InMemoryAssetRegistryRepository
from kontextual_engine.adapters.memory import InMemoryAssetRegistryRepository, InMemoryBlobStorage
from kontextual_engine.core import (
Actor,
ActorType,
@@ -52,7 +52,7 @@ from kontextual_engine.core import (
utc_now,
)
from kontextual_engine.errors import AuthorizationError, KontextualError, NotFoundError, ValidationError
from kontextual_engine.ports import AllowAllPolicyGateway, AssetRegistryRepository, PolicyGateway
from kontextual_engine.ports import AllowAllPolicyGateway, AssetRegistryRepository, BlobStorage, PolicyGateway
from kontextual_engine.services import (
AssetIngestionService,
AssetQueryRequest,
@@ -60,6 +60,7 @@ from kontextual_engine.services import (
AssetRetrievalService,
ContextEntityQueryRequest,
RelationshipQueryRequest,
RepresentationContentService,
RetrievalFeedbackRequest,
TransformationRequest,
TransformationService,
@@ -179,6 +180,7 @@ AGENT_OPERATION_CATALOG: tuple[dict[str, Any], ...] = (
@dataclass
class ServiceRuntime:
repository: AssetRegistryRepository = field(default_factory=InMemoryAssetRegistryRepository)
blob_storage: BlobStorage = field(default_factory=InMemoryBlobStorage)
policy_gateway: PolicyGateway = field(default_factory=AllowAllPolicyGateway)
api_version: str = API_VERSION
service_name: str = "kontextual-engine"
@@ -193,6 +195,14 @@ class ServiceRuntime:
def retrieval_service(self) -> AssetRetrievalService:
return AssetRetrievalService(self.repository, policy_gateway=self.policy_gateway)
def content_service(self) -> RepresentationContentService:
return RepresentationContentService(
self.repository,
self.blob_storage,
policy_gateway=self.policy_gateway,
asset_service=self.asset_service(),
)
def transformation_service(self) -> TransformationService:
return TransformationService(
self.repository,
@@ -407,6 +417,25 @@ class ServiceRuntime:
)
return content_stream
def cmis_content_stream_bytes(
self,
access_point_id: str,
object_id: str,
context: OperationContext,
):
mapper = self._cmis_mapper(access_point_id)
decision = mapper.access_point.decide_action(CMISAction.GET_CONTENT_STREAM, context, resource=object_id)
if not decision.allowed:
raise _cmis_authorization_error(decision, "getContentStream")
asset_id = _cmis_asset_id(object_id)
asset = self.repository.get_asset(asset_id)
if not mapper.access_point.exposes_asset(asset, context):
raise NotFoundError(
"CMIS object not found",
details={"object_id": object_id, "access_point_id": access_point_id},
)
return self.content_service().stream_content(asset_id, context)
def cmis_acl(
self,
access_point_id: str,
@@ -466,23 +495,23 @@ class ServiceRuntime:
"metadata": dict(payload.get("classification_metadata", {})),
}
)
asset_id = payload.get("asset_id") or new_id("asset")
content = payload.get("content")
representations = []
if content is not None:
representations.append(
AssetRepresentation.from_content(
payload.get("asset_id") or "cmis-new-document",
RepresentationKind.SOURCE,
payload.get("media_type", "text/plain"),
content,
storage_ref=payload.get("storage_ref"),
)
representation, _blob, _created = self.content_service().build_representation_from_bytes(
asset_id,
RepresentationKind.SOURCE,
payload.get("media_type", "text/plain"),
content,
metadata={"cmis": {"operation": "createDocument"}},
)
representations.append(representation)
result = self.asset_service().create_asset(
payload["name"],
classification,
context,
asset_id=payload.get("asset_id"),
asset_id=asset_id,
representations=representations,
metadata_records=[_metadata_record(item) for item in payload.get("metadata_records", [])],
idempotency_key=payload.get("idempotency_key"),
@@ -527,21 +556,29 @@ class ServiceRuntime:
if not decision.allowed:
raise _cmis_authorization_error(decision, "setContentStream")
asset_id = _cmis_asset_id(object_id)
representation = AssetRepresentation.from_content(
self.content_service().add_representation_from_bytes(
asset_id,
payload.get("kind", RepresentationKind.SOURCE.value),
payload.get("media_type", "text/plain"),
payload.get("content", ""),
storage_ref=payload.get("storage_ref"),
)
self.asset_service().add_representation(
asset_id,
representation,
context,
expected_current_version_id=payload.get("expected_current_version_id"),
metadata={"cmis": {"operation": "setContentStream"}},
)
return self.cmis_object(access_point_id, object_id, context)
def representation_content_stream(
self,
asset_id: str,
representation_id: str,
context: OperationContext,
):
return self.content_service().stream_content(
asset_id,
context,
representation_id=representation_id,
)
def cmis_delete_object(
self,
access_point_id: str,
@@ -2031,7 +2068,7 @@ class ServiceRuntime:
def create_app(runtime: ServiceRuntime | None = None):
try:
from fastapi import Depends, FastAPI, Header, HTTPException, Query
from fastapi.responses import JSONResponse
from fastapi.responses import JSONResponse, StreamingResponse
except ImportError as exc: # pragma: no cover - exercised when optional extra is absent
raise RuntimeError(
"FastAPI service dependencies are not installed. Install kontextual-engine[service]."
@@ -2202,6 +2239,25 @@ def create_app(runtime: ServiceRuntime | None = None):
) -> dict[str, Any]:
return response(runtime.cmis_content_stream, access_point_id, object_id, context)
@app.get("/cmis/{access_point_id}/browser/content-bytes/{object_id:path}", tags=["cmis"])
def cmis_content_stream_bytes(
access_point_id: str,
object_id: str,
context: OperationContext = Depends(context_from_headers),
) -> StreamingResponse:
result = response(runtime.cmis_content_stream_bytes, access_point_id, object_id, context)
representation = result.representation
return StreamingResponse(
result.chunks,
media_type=representation.media_type,
headers={
"Content-Length": str(representation.size_bytes),
"ETag": representation.digest,
"X-Kontextual-Representation-Id": representation.representation_id,
"X-Kontextual-Storage-Ref": representation.storage_ref or "",
},
)
@app.get("/cmis/{access_point_id}/browser/acl/{object_id:path}", tags=["cmis"])
def cmis_acl(
access_point_id: str,
@@ -2323,6 +2379,25 @@ def create_app(runtime: ServiceRuntime | None = None):
def get_asset(asset_id: str) -> dict[str, Any]:
return response(runtime.get_asset, asset_id)
@app.get(f"{prefix}/assets/{{asset_id}}/representations/{{representation_id}}/content", tags=["assets"])
def get_representation_content(
asset_id: str,
representation_id: str,
context: OperationContext = Depends(context_from_headers),
) -> StreamingResponse:
result = response(runtime.representation_content_stream, asset_id, representation_id, context)
representation = result.representation
return StreamingResponse(
result.chunks,
media_type=representation.media_type,
headers={
"Content-Length": str(representation.size_bytes),
"ETag": representation.digest,
"X-Kontextual-Representation-Id": representation.representation_id,
"X-Kontextual-Storage-Ref": representation.storage_ref or "",
},
)
@app.post(f"{prefix}/assets/{{asset_id}}/metadata", tags=["metadata"])
def add_metadata(
asset_id: str,