Files
kontextual-engine/src/kontextual_engine/artifacts.py

296 lines
9.1 KiB
Python

"""Core artifact, collection, and relationship models."""
from __future__ import annotations
import hashlib
import json
import uuid
from dataclasses import asdict, dataclass, field
from datetime import datetime, timezone
from enum import Enum
from typing import Any
def utc_now() -> datetime:
return datetime.now(timezone.utc)
def content_digest(content: str | bytes) -> str:
"""Return a stable SHA-256 digest prefixed with the algorithm name."""
if isinstance(content, str):
data = content.encode("utf-8")
else:
data = content
return "sha256:" + hashlib.sha256(data).hexdigest()
def stable_json_dumps(value: Any) -> str:
return json.dumps(value, sort_keys=True, separators=(",", ":"), default=str)
def bundle_digest(value: Any) -> str:
"""Digest arbitrary JSON-like data with deterministic key ordering."""
return content_digest(stable_json_dumps(value))
class ArtifactType(str, Enum):
CONTENT = "content"
TEMPLATE = "template"
GENERATED = "generated"
DATASET = "dataset"
DOCUMENT = "document"
CONFIG = "config"
SCHEMA = "schema"
RUN_OUTPUT = "run_output"
class RelationshipType(str, Enum):
RELATES_TO = "relates_to"
DEPENDS_ON = "depends_on"
PRODUCES = "produces"
DERIVED_FROM = "derived_from"
EVALUATES = "evaluates"
CONTAINS = "contains"
CUSTOM = "custom"
@dataclass
class ArtifactMetadata:
description: str | None = None
tags: list[str] = field(default_factory=list)
media_type: str | None = None
source_uri: str | None = None
version: str | None = None
custom: dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> dict[str, Any]:
return asdict(self)
@classmethod
def from_dict(cls, data: dict[str, Any] | None) -> "ArtifactMetadata":
data = data or {}
return cls(
description=data.get("description"),
tags=list(data.get("tags", [])),
media_type=data.get("media_type"),
source_uri=data.get("source_uri"),
version=data.get("version"),
custom=dict(data.get("custom", {})),
)
@dataclass
class Collection:
id: str
name: str
domain: str | None = None
parent_id: str | None = None
metadata: dict[str, Any] = field(default_factory=dict)
created_at: datetime = field(default_factory=utc_now)
updated_at: datetime = field(default_factory=utc_now)
@classmethod
def create(
cls,
name: str,
*,
domain: str | None = None,
parent_id: str | None = None,
collection_id: str | None = None,
metadata: dict[str, Any] | None = None,
) -> "Collection":
return cls(
id=collection_id or str(uuid.uuid4()),
name=name,
domain=domain,
parent_id=parent_id,
metadata=metadata or {},
)
def to_dict(self) -> dict[str, Any]:
return {
"id": self.id,
"name": self.name,
"domain": self.domain,
"parent_id": self.parent_id,
"metadata": dict(self.metadata),
"created_at": self.created_at.isoformat(),
"updated_at": self.updated_at.isoformat(),
}
@classmethod
def from_dict(cls, data: dict[str, Any]) -> "Collection":
return cls(
id=data["id"],
name=data["name"],
domain=data.get("domain"),
parent_id=data.get("parent_id"),
metadata=dict(data.get("metadata", {})),
created_at=datetime.fromisoformat(data["created_at"]),
updated_at=datetime.fromisoformat(data["updated_at"]),
)
@dataclass
class Artifact:
id: str
collection_id: str
name: str
artifact_type: ArtifactType
content_digest: str
content_size: int
content: str = ""
metadata: ArtifactMetadata = field(default_factory=ArtifactMetadata)
created_at: datetime = field(default_factory=utc_now)
updated_at: datetime = field(default_factory=utc_now)
@classmethod
def create(
cls,
collection_id: str,
name: str,
content: str,
*,
artifact_type: ArtifactType = ArtifactType.CONTENT,
metadata: ArtifactMetadata | None = None,
artifact_id: str | None = None,
) -> "Artifact":
return cls(
id=artifact_id or str(uuid.uuid4()),
collection_id=collection_id,
name=name,
artifact_type=artifact_type,
content_digest=content_digest(content),
content_size=len(content.encode("utf-8")),
content=content,
metadata=metadata or ArtifactMetadata(),
)
def update_content(self, content: str) -> None:
self.content = content
self.content_digest = content_digest(content)
self.content_size = len(content.encode("utf-8"))
self.updated_at = utc_now()
def has_changed(self, digest: str) -> bool:
return self.content_digest != digest
def to_dict(self) -> dict[str, Any]:
return {
"id": self.id,
"collection_id": self.collection_id,
"name": self.name,
"artifact_type": self.artifact_type.value,
"content_digest": self.content_digest,
"content_size": self.content_size,
"content": self.content,
"metadata": self.metadata.to_dict(),
"created_at": self.created_at.isoformat(),
"updated_at": self.updated_at.isoformat(),
}
@classmethod
def from_dict(cls, data: dict[str, Any]) -> "Artifact":
return cls(
id=data["id"],
collection_id=data["collection_id"],
name=data["name"],
artifact_type=ArtifactType(data["artifact_type"]),
content_digest=data["content_digest"],
content_size=int(data["content_size"]),
content=data.get("content", ""),
metadata=ArtifactMetadata.from_dict(data.get("metadata")),
created_at=datetime.fromisoformat(data["created_at"]),
updated_at=datetime.fromisoformat(data["updated_at"]),
)
@dataclass(frozen=True)
class ArtifactReference:
name: str
collection_id: str | None = None
version: str | None = None
@classmethod
def parse(cls, value: str) -> "ArtifactReference":
if not value:
raise ValueError("Artifact reference cannot be empty")
ref, version = (value.rsplit("@", 1) + [None])[:2] if "@" in value else (value, None)
if ":" in ref:
collection_id, name = ref.split(":", 1)
else:
collection_id, name = None, ref
if not name:
raise ValueError(f"Invalid artifact reference: {value}")
return cls(name=name, collection_id=collection_id or None, version=version)
def __str__(self) -> str:
ref = f"{self.collection_id}:{self.name}" if self.collection_id else self.name
return f"{ref}@{self.version}" if self.version else ref
@dataclass
class Relationship:
id: str
source_artifact_id: str
target_artifact_id: str
predicate: str
relationship_type: RelationshipType = RelationshipType.RELATES_TO
evidence: str = ""
provenance: dict[str, Any] = field(default_factory=dict)
created_at: datetime = field(default_factory=utc_now)
@classmethod
def create(
cls,
source_artifact_id: str,
target_artifact_id: str,
predicate: str,
*,
relationship_type: RelationshipType = RelationshipType.RELATES_TO,
evidence: str = "",
provenance: dict[str, Any] | None = None,
relationship_id: str | None = None,
) -> "Relationship":
return cls(
id=relationship_id or str(uuid.uuid4()),
source_artifact_id=source_artifact_id,
target_artifact_id=target_artifact_id,
predicate=predicate,
relationship_type=relationship_type,
evidence=evidence,
provenance=provenance or {},
)
def edge(self) -> tuple[str, str, str]:
return (self.source_artifact_id, self.target_artifact_id, self.predicate)
def to_dict(self) -> dict[str, Any]:
return {
"id": self.id,
"source_artifact_id": self.source_artifact_id,
"target_artifact_id": self.target_artifact_id,
"predicate": self.predicate,
"relationship_type": self.relationship_type.value,
"evidence": self.evidence,
"provenance": dict(self.provenance),
"created_at": self.created_at.isoformat(),
}
@classmethod
def from_dict(cls, data: dict[str, Any]) -> "Relationship":
return cls(
id=data["id"],
source_artifact_id=data["source_artifact_id"],
target_artifact_id=data["target_artifact_id"],
predicate=data["predicate"],
relationship_type=RelationshipType(data.get("relationship_type", "relates_to")),
evidence=data.get("evidence", ""),
provenance=dict(data.get("provenance", {})),
created_at=datetime.fromisoformat(data["created_at"]),
)