generated from coulomb/repo-seed
181 lines
6.5 KiB
Python
181 lines
6.5 KiB
Python
"""Canonical knowledge asset and representation models."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass, field, replace
|
|
from enum import Enum
|
|
from typing import Any
|
|
|
|
from .metadata import Classification, LifecycleState
|
|
from .primitives import compact_dict, content_digest, new_id, utc_now
|
|
from .provenance import SourceReference
|
|
|
|
|
|
class RepresentationKind(str, Enum):
|
|
SOURCE = "source"
|
|
NORMALIZED = "normalized"
|
|
DERIVED = "derived"
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class AssetRepresentation:
|
|
asset_id: str
|
|
kind: RepresentationKind
|
|
media_type: str
|
|
digest: str
|
|
size_bytes: int
|
|
storage_ref: str | None = None
|
|
producer: str | None = None
|
|
source_ref_id: str | None = None
|
|
metadata: dict[str, Any] = field(default_factory=dict)
|
|
representation_id: str = field(default_factory=lambda: new_id("repr"))
|
|
created_at: str = field(default_factory=lambda: utc_now().isoformat())
|
|
|
|
@classmethod
|
|
def from_content(
|
|
cls,
|
|
asset_id: str,
|
|
kind: RepresentationKind | str,
|
|
media_type: str,
|
|
content: str | bytes,
|
|
*,
|
|
storage_ref: str | None = None,
|
|
producer: str | None = None,
|
|
source_ref_id: str | None = None,
|
|
metadata: dict[str, Any] | None = None,
|
|
representation_id: str | None = None,
|
|
) -> "AssetRepresentation":
|
|
data = content.encode("utf-8") if isinstance(content, str) else content
|
|
return cls(
|
|
representation_id=representation_id or new_id("repr"),
|
|
asset_id=asset_id,
|
|
kind=RepresentationKind(kind),
|
|
media_type=media_type,
|
|
digest=content_digest(data),
|
|
size_bytes=len(data),
|
|
storage_ref=storage_ref,
|
|
producer=producer,
|
|
source_ref_id=source_ref_id,
|
|
metadata=dict(metadata or {}),
|
|
)
|
|
|
|
def to_dict(self) -> dict[str, Any]:
|
|
return compact_dict(
|
|
{
|
|
"representation_id": self.representation_id,
|
|
"asset_id": self.asset_id,
|
|
"kind": self.kind.value,
|
|
"media_type": self.media_type,
|
|
"digest": self.digest,
|
|
"size_bytes": self.size_bytes,
|
|
"storage_ref": self.storage_ref,
|
|
"producer": self.producer,
|
|
"source_ref_id": self.source_ref_id,
|
|
"metadata": dict(self.metadata),
|
|
"created_at": self.created_at,
|
|
}
|
|
)
|
|
|
|
@classmethod
|
|
def from_dict(cls, data: dict[str, Any]) -> "AssetRepresentation":
|
|
return cls(
|
|
representation_id=data["representation_id"],
|
|
asset_id=data["asset_id"],
|
|
kind=RepresentationKind(data["kind"]),
|
|
media_type=data["media_type"],
|
|
digest=data["digest"],
|
|
size_bytes=int(data["size_bytes"]),
|
|
storage_ref=data.get("storage_ref"),
|
|
producer=data.get("producer"),
|
|
source_ref_id=data.get("source_ref_id"),
|
|
metadata=dict(data.get("metadata", {})),
|
|
created_at=data["created_at"],
|
|
)
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class KnowledgeAsset:
|
|
id: str
|
|
title: str
|
|
classification: Classification
|
|
source_refs: tuple[SourceReference, ...] = ()
|
|
aliases: tuple[str, ...] = ()
|
|
current_version_id: str | None = None
|
|
lifecycle: LifecycleState = LifecycleState.ACTIVE
|
|
metadata: dict[str, Any] = field(default_factory=dict)
|
|
created_at: str = field(default_factory=lambda: utc_now().isoformat())
|
|
updated_at: str = field(default_factory=lambda: utc_now().isoformat())
|
|
|
|
@classmethod
|
|
def create(
|
|
cls,
|
|
title: str,
|
|
classification: Classification,
|
|
*,
|
|
asset_id: str | None = None,
|
|
source_refs: list[SourceReference] | tuple[SourceReference, ...] | None = None,
|
|
aliases: list[str] | tuple[str, ...] | None = None,
|
|
metadata: dict[str, Any] | None = None,
|
|
) -> "KnowledgeAsset":
|
|
return cls(
|
|
id=asset_id or new_id("asset"),
|
|
title=title,
|
|
classification=classification,
|
|
source_refs=tuple(source_refs or ()),
|
|
aliases=tuple(aliases or ()),
|
|
metadata=dict(metadata or {}),
|
|
lifecycle=classification.lifecycle,
|
|
)
|
|
|
|
def with_source_reference(self, source_ref: SourceReference) -> "KnowledgeAsset":
|
|
return replace(self, source_refs=self.source_refs + (source_ref,), updated_at=utc_now().isoformat())
|
|
|
|
def with_alias(self, alias: str) -> "KnowledgeAsset":
|
|
if alias in self.aliases:
|
|
return self
|
|
return replace(self, aliases=self.aliases + (alias,), updated_at=utc_now().isoformat())
|
|
|
|
def with_current_version(self, version_id: str) -> "KnowledgeAsset":
|
|
return replace(self, current_version_id=version_id, updated_at=utc_now().isoformat())
|
|
|
|
def transition_lifecycle(self, lifecycle: LifecycleState | str) -> "KnowledgeAsset":
|
|
lifecycle_state = LifecycleState(lifecycle)
|
|
classification = replace(self.classification, lifecycle=lifecycle_state)
|
|
return replace(
|
|
self,
|
|
lifecycle=lifecycle_state,
|
|
classification=classification,
|
|
updated_at=utc_now().isoformat(),
|
|
)
|
|
|
|
def to_dict(self) -> dict[str, Any]:
|
|
return compact_dict(
|
|
{
|
|
"id": self.id,
|
|
"title": self.title,
|
|
"classification": self.classification.to_dict(),
|
|
"source_refs": [source_ref.to_dict() for source_ref in self.source_refs],
|
|
"aliases": list(self.aliases),
|
|
"current_version_id": self.current_version_id,
|
|
"lifecycle": self.lifecycle.value,
|
|
"metadata": dict(self.metadata),
|
|
"created_at": self.created_at,
|
|
"updated_at": self.updated_at,
|
|
}
|
|
)
|
|
|
|
@classmethod
|
|
def from_dict(cls, data: dict[str, Any]) -> "KnowledgeAsset":
|
|
return cls(
|
|
id=data["id"],
|
|
title=data["title"],
|
|
classification=Classification.from_dict(data["classification"]),
|
|
source_refs=tuple(SourceReference.from_dict(item) for item in data.get("source_refs", [])),
|
|
aliases=tuple(data.get("aliases", [])),
|
|
current_version_id=data.get("current_version_id"),
|
|
lifecycle=LifecycleState(data.get("lifecycle", LifecycleState.ACTIVE.value)),
|
|
metadata=dict(data.get("metadata", {})),
|
|
created_at=data["created_at"],
|
|
updated_at=data["updated_at"],
|
|
)
|