generated from coulomb/repo-seed
Implement first knowledge engine runtime slice
This commit is contained in:
295
src/kontextual_engine/artifacts.py
Normal file
295
src/kontextual_engine/artifacts.py
Normal file
@@ -0,0 +1,295 @@
|
||||
"""Core artifact, collection, and relationship models."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import uuid
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
|
||||
|
||||
def utc_now() -> datetime:
|
||||
return datetime.now(timezone.utc)
|
||||
|
||||
|
||||
def content_digest(content: str | bytes) -> str:
|
||||
"""Return a stable SHA-256 digest prefixed with the algorithm name."""
|
||||
|
||||
if isinstance(content, str):
|
||||
data = content.encode("utf-8")
|
||||
else:
|
||||
data = content
|
||||
return "sha256:" + hashlib.sha256(data).hexdigest()
|
||||
|
||||
|
||||
def stable_json_dumps(value: Any) -> str:
|
||||
return json.dumps(value, sort_keys=True, separators=(",", ":"), default=str)
|
||||
|
||||
|
||||
def bundle_digest(value: Any) -> str:
|
||||
"""Digest arbitrary JSON-like data with deterministic key ordering."""
|
||||
|
||||
return content_digest(stable_json_dumps(value))
|
||||
|
||||
|
||||
class ArtifactType(str, Enum):
|
||||
CONTENT = "content"
|
||||
TEMPLATE = "template"
|
||||
GENERATED = "generated"
|
||||
DATASET = "dataset"
|
||||
DOCUMENT = "document"
|
||||
CONFIG = "config"
|
||||
SCHEMA = "schema"
|
||||
RUN_OUTPUT = "run_output"
|
||||
|
||||
|
||||
class RelationshipType(str, Enum):
|
||||
RELATES_TO = "relates_to"
|
||||
DEPENDS_ON = "depends_on"
|
||||
PRODUCES = "produces"
|
||||
DERIVED_FROM = "derived_from"
|
||||
EVALUATES = "evaluates"
|
||||
CONTAINS = "contains"
|
||||
CUSTOM = "custom"
|
||||
|
||||
|
||||
@dataclass
|
||||
class ArtifactMetadata:
|
||||
description: str | None = None
|
||||
tags: list[str] = field(default_factory=list)
|
||||
media_type: str | None = None
|
||||
source_uri: str | None = None
|
||||
version: str | None = None
|
||||
custom: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return asdict(self)
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict[str, Any] | None) -> "ArtifactMetadata":
|
||||
data = data or {}
|
||||
return cls(
|
||||
description=data.get("description"),
|
||||
tags=list(data.get("tags", [])),
|
||||
media_type=data.get("media_type"),
|
||||
source_uri=data.get("source_uri"),
|
||||
version=data.get("version"),
|
||||
custom=dict(data.get("custom", {})),
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Collection:
|
||||
id: str
|
||||
name: str
|
||||
domain: str | None = None
|
||||
parent_id: str | None = None
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
created_at: datetime = field(default_factory=utc_now)
|
||||
updated_at: datetime = field(default_factory=utc_now)
|
||||
|
||||
@classmethod
|
||||
def create(
|
||||
cls,
|
||||
name: str,
|
||||
*,
|
||||
domain: str | None = None,
|
||||
parent_id: str | None = None,
|
||||
collection_id: str | None = None,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
) -> "Collection":
|
||||
return cls(
|
||||
id=collection_id or str(uuid.uuid4()),
|
||||
name=name,
|
||||
domain=domain,
|
||||
parent_id=parent_id,
|
||||
metadata=metadata or {},
|
||||
)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"id": self.id,
|
||||
"name": self.name,
|
||||
"domain": self.domain,
|
||||
"parent_id": self.parent_id,
|
||||
"metadata": dict(self.metadata),
|
||||
"created_at": self.created_at.isoformat(),
|
||||
"updated_at": self.updated_at.isoformat(),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict[str, Any]) -> "Collection":
|
||||
return cls(
|
||||
id=data["id"],
|
||||
name=data["name"],
|
||||
domain=data.get("domain"),
|
||||
parent_id=data.get("parent_id"),
|
||||
metadata=dict(data.get("metadata", {})),
|
||||
created_at=datetime.fromisoformat(data["created_at"]),
|
||||
updated_at=datetime.fromisoformat(data["updated_at"]),
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Artifact:
|
||||
id: str
|
||||
collection_id: str
|
||||
name: str
|
||||
artifact_type: ArtifactType
|
||||
content_digest: str
|
||||
content_size: int
|
||||
content: str = ""
|
||||
metadata: ArtifactMetadata = field(default_factory=ArtifactMetadata)
|
||||
created_at: datetime = field(default_factory=utc_now)
|
||||
updated_at: datetime = field(default_factory=utc_now)
|
||||
|
||||
@classmethod
|
||||
def create(
|
||||
cls,
|
||||
collection_id: str,
|
||||
name: str,
|
||||
content: str,
|
||||
*,
|
||||
artifact_type: ArtifactType = ArtifactType.CONTENT,
|
||||
metadata: ArtifactMetadata | None = None,
|
||||
artifact_id: str | None = None,
|
||||
) -> "Artifact":
|
||||
return cls(
|
||||
id=artifact_id or str(uuid.uuid4()),
|
||||
collection_id=collection_id,
|
||||
name=name,
|
||||
artifact_type=artifact_type,
|
||||
content_digest=content_digest(content),
|
||||
content_size=len(content.encode("utf-8")),
|
||||
content=content,
|
||||
metadata=metadata or ArtifactMetadata(),
|
||||
)
|
||||
|
||||
def update_content(self, content: str) -> None:
|
||||
self.content = content
|
||||
self.content_digest = content_digest(content)
|
||||
self.content_size = len(content.encode("utf-8"))
|
||||
self.updated_at = utc_now()
|
||||
|
||||
def has_changed(self, digest: str) -> bool:
|
||||
return self.content_digest != digest
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"id": self.id,
|
||||
"collection_id": self.collection_id,
|
||||
"name": self.name,
|
||||
"artifact_type": self.artifact_type.value,
|
||||
"content_digest": self.content_digest,
|
||||
"content_size": self.content_size,
|
||||
"content": self.content,
|
||||
"metadata": self.metadata.to_dict(),
|
||||
"created_at": self.created_at.isoformat(),
|
||||
"updated_at": self.updated_at.isoformat(),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict[str, Any]) -> "Artifact":
|
||||
return cls(
|
||||
id=data["id"],
|
||||
collection_id=data["collection_id"],
|
||||
name=data["name"],
|
||||
artifact_type=ArtifactType(data["artifact_type"]),
|
||||
content_digest=data["content_digest"],
|
||||
content_size=int(data["content_size"]),
|
||||
content=data.get("content", ""),
|
||||
metadata=ArtifactMetadata.from_dict(data.get("metadata")),
|
||||
created_at=datetime.fromisoformat(data["created_at"]),
|
||||
updated_at=datetime.fromisoformat(data["updated_at"]),
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ArtifactReference:
|
||||
name: str
|
||||
collection_id: str | None = None
|
||||
version: str | None = None
|
||||
|
||||
@classmethod
|
||||
def parse(cls, value: str) -> "ArtifactReference":
|
||||
if not value:
|
||||
raise ValueError("Artifact reference cannot be empty")
|
||||
|
||||
ref, version = (value.rsplit("@", 1) + [None])[:2] if "@" in value else (value, None)
|
||||
if ":" in ref:
|
||||
collection_id, name = ref.split(":", 1)
|
||||
else:
|
||||
collection_id, name = None, ref
|
||||
if not name:
|
||||
raise ValueError(f"Invalid artifact reference: {value}")
|
||||
return cls(name=name, collection_id=collection_id or None, version=version)
|
||||
|
||||
def __str__(self) -> str:
|
||||
ref = f"{self.collection_id}:{self.name}" if self.collection_id else self.name
|
||||
return f"{ref}@{self.version}" if self.version else ref
|
||||
|
||||
|
||||
@dataclass
|
||||
class Relationship:
|
||||
id: str
|
||||
source_artifact_id: str
|
||||
target_artifact_id: str
|
||||
predicate: str
|
||||
relationship_type: RelationshipType = RelationshipType.RELATES_TO
|
||||
evidence: str = ""
|
||||
provenance: dict[str, Any] = field(default_factory=dict)
|
||||
created_at: datetime = field(default_factory=utc_now)
|
||||
|
||||
@classmethod
|
||||
def create(
|
||||
cls,
|
||||
source_artifact_id: str,
|
||||
target_artifact_id: str,
|
||||
predicate: str,
|
||||
*,
|
||||
relationship_type: RelationshipType = RelationshipType.RELATES_TO,
|
||||
evidence: str = "",
|
||||
provenance: dict[str, Any] | None = None,
|
||||
relationship_id: str | None = None,
|
||||
) -> "Relationship":
|
||||
return cls(
|
||||
id=relationship_id or str(uuid.uuid4()),
|
||||
source_artifact_id=source_artifact_id,
|
||||
target_artifact_id=target_artifact_id,
|
||||
predicate=predicate,
|
||||
relationship_type=relationship_type,
|
||||
evidence=evidence,
|
||||
provenance=provenance or {},
|
||||
)
|
||||
|
||||
def edge(self) -> tuple[str, str, str]:
|
||||
return (self.source_artifact_id, self.target_artifact_id, self.predicate)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"id": self.id,
|
||||
"source_artifact_id": self.source_artifact_id,
|
||||
"target_artifact_id": self.target_artifact_id,
|
||||
"predicate": self.predicate,
|
||||
"relationship_type": self.relationship_type.value,
|
||||
"evidence": self.evidence,
|
||||
"provenance": dict(self.provenance),
|
||||
"created_at": self.created_at.isoformat(),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict[str, Any]) -> "Relationship":
|
||||
return cls(
|
||||
id=data["id"],
|
||||
source_artifact_id=data["source_artifact_id"],
|
||||
target_artifact_id=data["target_artifact_id"],
|
||||
predicate=data["predicate"],
|
||||
relationship_type=RelationshipType(data.get("relationship_type", "relates_to")),
|
||||
evidence=data.get("evidence", ""),
|
||||
provenance=dict(data.get("provenance", {})),
|
||||
created_at=datetime.fromisoformat(data["created_at"]),
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user