"""Canonical knowledge asset and representation models.""" from __future__ import annotations from dataclasses import dataclass, field, replace from enum import Enum from typing import Any from .metadata import Classification, LifecycleState from .primitives import compact_dict, content_digest, new_id, utc_now from .provenance import SourceReference class RepresentationKind(str, Enum): SOURCE = "source" NORMALIZED = "normalized" DERIVED = "derived" @dataclass(frozen=True) class AssetRepresentation: asset_id: str kind: RepresentationKind media_type: str digest: str size_bytes: int storage_ref: str | None = None producer: str | None = None source_ref_id: str | None = None metadata: dict[str, Any] = field(default_factory=dict) representation_id: str = field(default_factory=lambda: new_id("repr")) created_at: str = field(default_factory=lambda: utc_now().isoformat()) @classmethod def from_content( cls, asset_id: str, kind: RepresentationKind | str, media_type: str, content: str | bytes, *, storage_ref: str | None = None, producer: str | None = None, source_ref_id: str | None = None, metadata: dict[str, Any] | None = None, representation_id: str | None = None, ) -> "AssetRepresentation": data = content.encode("utf-8") if isinstance(content, str) else content return cls( representation_id=representation_id or new_id("repr"), asset_id=asset_id, kind=RepresentationKind(kind), media_type=media_type, digest=content_digest(data), size_bytes=len(data), storage_ref=storage_ref, producer=producer, source_ref_id=source_ref_id, metadata=dict(metadata or {}), ) def to_dict(self) -> dict[str, Any]: return compact_dict( { "representation_id": self.representation_id, "asset_id": self.asset_id, "kind": self.kind.value, "media_type": self.media_type, "digest": self.digest, "size_bytes": self.size_bytes, "storage_ref": self.storage_ref, "producer": self.producer, "source_ref_id": self.source_ref_id, "metadata": dict(self.metadata), "created_at": self.created_at, } ) @classmethod def from_dict(cls, data: dict[str, Any]) -> "AssetRepresentation": return cls( representation_id=data["representation_id"], asset_id=data["asset_id"], kind=RepresentationKind(data["kind"]), media_type=data["media_type"], digest=data["digest"], size_bytes=int(data["size_bytes"]), storage_ref=data.get("storage_ref"), producer=data.get("producer"), source_ref_id=data.get("source_ref_id"), metadata=dict(data.get("metadata", {})), created_at=data["created_at"], ) @dataclass(frozen=True) class KnowledgeAsset: id: str title: str classification: Classification source_refs: tuple[SourceReference, ...] = () aliases: tuple[str, ...] = () current_version_id: str | None = None lifecycle: LifecycleState = LifecycleState.ACTIVE metadata: dict[str, Any] = field(default_factory=dict) created_at: str = field(default_factory=lambda: utc_now().isoformat()) updated_at: str = field(default_factory=lambda: utc_now().isoformat()) @classmethod def create( cls, title: str, classification: Classification, *, asset_id: str | None = None, source_refs: list[SourceReference] | tuple[SourceReference, ...] | None = None, aliases: list[str] | tuple[str, ...] | None = None, metadata: dict[str, Any] | None = None, ) -> "KnowledgeAsset": return cls( id=asset_id or new_id("asset"), title=title, classification=classification, source_refs=tuple(source_refs or ()), aliases=tuple(aliases or ()), metadata=dict(metadata or {}), lifecycle=classification.lifecycle, ) def with_source_reference(self, source_ref: SourceReference) -> "KnowledgeAsset": return replace(self, source_refs=self.source_refs + (source_ref,), updated_at=utc_now().isoformat()) def with_alias(self, alias: str) -> "KnowledgeAsset": if alias in self.aliases: return self return replace(self, aliases=self.aliases + (alias,), updated_at=utc_now().isoformat()) def with_current_version(self, version_id: str) -> "KnowledgeAsset": return replace(self, current_version_id=version_id, updated_at=utc_now().isoformat()) def transition_lifecycle(self, lifecycle: LifecycleState | str) -> "KnowledgeAsset": lifecycle_state = LifecycleState(lifecycle) classification = replace(self.classification, lifecycle=lifecycle_state) return replace( self, lifecycle=lifecycle_state, classification=classification, updated_at=utc_now().isoformat(), ) def to_dict(self) -> dict[str, Any]: return compact_dict( { "id": self.id, "title": self.title, "classification": self.classification.to_dict(), "source_refs": [source_ref.to_dict() for source_ref in self.source_refs], "aliases": list(self.aliases), "current_version_id": self.current_version_id, "lifecycle": self.lifecycle.value, "metadata": dict(self.metadata), "created_at": self.created_at, "updated_at": self.updated_at, } ) @classmethod def from_dict(cls, data: dict[str, Any]) -> "KnowledgeAsset": return cls( id=data["id"], title=data["title"], classification=Classification.from_dict(data["classification"]), source_refs=tuple(SourceReference.from_dict(item) for item in data.get("source_refs", [])), aliases=tuple(data.get("aliases", [])), current_version_id=data.get("current_version_id"), lifecycle=LifecycleState(data.get("lifecycle", LifecycleState.ACTIVE.value)), metadata=dict(data.get("metadata", {})), created_at=data["created_at"], updated_at=data["updated_at"], )