Files
kontextual-engine/src/kontextual_engine/core/assets.py

181 lines
6.5 KiB
Python

"""Canonical knowledge asset and representation models."""
from __future__ import annotations
from dataclasses import dataclass, field, replace
from enum import Enum
from typing import Any
from .metadata import Classification, LifecycleState
from .primitives import compact_dict, content_digest, new_id, utc_now
from .provenance import SourceReference
class RepresentationKind(str, Enum):
SOURCE = "source"
NORMALIZED = "normalized"
DERIVED = "derived"
@dataclass(frozen=True)
class AssetRepresentation:
asset_id: str
kind: RepresentationKind
media_type: str
digest: str
size_bytes: int
storage_ref: str | None = None
producer: str | None = None
source_ref_id: str | None = None
metadata: dict[str, Any] = field(default_factory=dict)
representation_id: str = field(default_factory=lambda: new_id("repr"))
created_at: str = field(default_factory=lambda: utc_now().isoformat())
@classmethod
def from_content(
cls,
asset_id: str,
kind: RepresentationKind | str,
media_type: str,
content: str | bytes,
*,
storage_ref: str | None = None,
producer: str | None = None,
source_ref_id: str | None = None,
metadata: dict[str, Any] | None = None,
representation_id: str | None = None,
) -> "AssetRepresentation":
data = content.encode("utf-8") if isinstance(content, str) else content
return cls(
representation_id=representation_id or new_id("repr"),
asset_id=asset_id,
kind=RepresentationKind(kind),
media_type=media_type,
digest=content_digest(data),
size_bytes=len(data),
storage_ref=storage_ref,
producer=producer,
source_ref_id=source_ref_id,
metadata=dict(metadata or {}),
)
def to_dict(self) -> dict[str, Any]:
return compact_dict(
{
"representation_id": self.representation_id,
"asset_id": self.asset_id,
"kind": self.kind.value,
"media_type": self.media_type,
"digest": self.digest,
"size_bytes": self.size_bytes,
"storage_ref": self.storage_ref,
"producer": self.producer,
"source_ref_id": self.source_ref_id,
"metadata": dict(self.metadata),
"created_at": self.created_at,
}
)
@classmethod
def from_dict(cls, data: dict[str, Any]) -> "AssetRepresentation":
return cls(
representation_id=data["representation_id"],
asset_id=data["asset_id"],
kind=RepresentationKind(data["kind"]),
media_type=data["media_type"],
digest=data["digest"],
size_bytes=int(data["size_bytes"]),
storage_ref=data.get("storage_ref"),
producer=data.get("producer"),
source_ref_id=data.get("source_ref_id"),
metadata=dict(data.get("metadata", {})),
created_at=data["created_at"],
)
@dataclass(frozen=True)
class KnowledgeAsset:
id: str
title: str
classification: Classification
source_refs: tuple[SourceReference, ...] = ()
aliases: tuple[str, ...] = ()
current_version_id: str | None = None
lifecycle: LifecycleState = LifecycleState.ACTIVE
metadata: dict[str, Any] = field(default_factory=dict)
created_at: str = field(default_factory=lambda: utc_now().isoformat())
updated_at: str = field(default_factory=lambda: utc_now().isoformat())
@classmethod
def create(
cls,
title: str,
classification: Classification,
*,
asset_id: str | None = None,
source_refs: list[SourceReference] | tuple[SourceReference, ...] | None = None,
aliases: list[str] | tuple[str, ...] | None = None,
metadata: dict[str, Any] | None = None,
) -> "KnowledgeAsset":
return cls(
id=asset_id or new_id("asset"),
title=title,
classification=classification,
source_refs=tuple(source_refs or ()),
aliases=tuple(aliases or ()),
metadata=dict(metadata or {}),
lifecycle=classification.lifecycle,
)
def with_source_reference(self, source_ref: SourceReference) -> "KnowledgeAsset":
return replace(self, source_refs=self.source_refs + (source_ref,), updated_at=utc_now().isoformat())
def with_alias(self, alias: str) -> "KnowledgeAsset":
if alias in self.aliases:
return self
return replace(self, aliases=self.aliases + (alias,), updated_at=utc_now().isoformat())
def with_current_version(self, version_id: str) -> "KnowledgeAsset":
return replace(self, current_version_id=version_id, updated_at=utc_now().isoformat())
def transition_lifecycle(self, lifecycle: LifecycleState | str) -> "KnowledgeAsset":
lifecycle_state = LifecycleState(lifecycle)
classification = replace(self.classification, lifecycle=lifecycle_state)
return replace(
self,
lifecycle=lifecycle_state,
classification=classification,
updated_at=utc_now().isoformat(),
)
def to_dict(self) -> dict[str, Any]:
return compact_dict(
{
"id": self.id,
"title": self.title,
"classification": self.classification.to_dict(),
"source_refs": [source_ref.to_dict() for source_ref in self.source_refs],
"aliases": list(self.aliases),
"current_version_id": self.current_version_id,
"lifecycle": self.lifecycle.value,
"metadata": dict(self.metadata),
"created_at": self.created_at,
"updated_at": self.updated_at,
}
)
@classmethod
def from_dict(cls, data: dict[str, Any]) -> "KnowledgeAsset":
return cls(
id=data["id"],
title=data["title"],
classification=Classification.from_dict(data["classification"]),
source_refs=tuple(SourceReference.from_dict(item) for item in data.get("source_refs", [])),
aliases=tuple(data.get("aliases", [])),
current_version_id=data.get("current_version_id"),
lifecycle=LifecycleState(data.get("lifecycle", LifecycleState.ACTIVE.value)),
metadata=dict(data.get("metadata", {})),
created_at=data["created_at"],
updated_at=data["updated_at"],
)