generated from coulomb/repo-seed
296 lines
9.1 KiB
Python
296 lines
9.1 KiB
Python
"""Core artifact, collection, and relationship models."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import hashlib
|
|
import json
|
|
import uuid
|
|
from dataclasses import asdict, dataclass, field
|
|
from datetime import datetime, timezone
|
|
from enum import Enum
|
|
from typing import Any
|
|
|
|
|
|
def utc_now() -> datetime:
|
|
return datetime.now(timezone.utc)
|
|
|
|
|
|
def content_digest(content: str | bytes) -> str:
|
|
"""Return a stable SHA-256 digest prefixed with the algorithm name."""
|
|
|
|
if isinstance(content, str):
|
|
data = content.encode("utf-8")
|
|
else:
|
|
data = content
|
|
return "sha256:" + hashlib.sha256(data).hexdigest()
|
|
|
|
|
|
def stable_json_dumps(value: Any) -> str:
|
|
return json.dumps(value, sort_keys=True, separators=(",", ":"), default=str)
|
|
|
|
|
|
def bundle_digest(value: Any) -> str:
|
|
"""Digest arbitrary JSON-like data with deterministic key ordering."""
|
|
|
|
return content_digest(stable_json_dumps(value))
|
|
|
|
|
|
class ArtifactType(str, Enum):
|
|
CONTENT = "content"
|
|
TEMPLATE = "template"
|
|
GENERATED = "generated"
|
|
DATASET = "dataset"
|
|
DOCUMENT = "document"
|
|
CONFIG = "config"
|
|
SCHEMA = "schema"
|
|
RUN_OUTPUT = "run_output"
|
|
|
|
|
|
class RelationshipType(str, Enum):
|
|
RELATES_TO = "relates_to"
|
|
DEPENDS_ON = "depends_on"
|
|
PRODUCES = "produces"
|
|
DERIVED_FROM = "derived_from"
|
|
EVALUATES = "evaluates"
|
|
CONTAINS = "contains"
|
|
CUSTOM = "custom"
|
|
|
|
|
|
@dataclass
|
|
class ArtifactMetadata:
|
|
description: str | None = None
|
|
tags: list[str] = field(default_factory=list)
|
|
media_type: str | None = None
|
|
source_uri: str | None = None
|
|
version: str | None = None
|
|
custom: dict[str, Any] = field(default_factory=dict)
|
|
|
|
def to_dict(self) -> dict[str, Any]:
|
|
return asdict(self)
|
|
|
|
@classmethod
|
|
def from_dict(cls, data: dict[str, Any] | None) -> "ArtifactMetadata":
|
|
data = data or {}
|
|
return cls(
|
|
description=data.get("description"),
|
|
tags=list(data.get("tags", [])),
|
|
media_type=data.get("media_type"),
|
|
source_uri=data.get("source_uri"),
|
|
version=data.get("version"),
|
|
custom=dict(data.get("custom", {})),
|
|
)
|
|
|
|
|
|
@dataclass
|
|
class Collection:
|
|
id: str
|
|
name: str
|
|
domain: str | None = None
|
|
parent_id: str | None = None
|
|
metadata: dict[str, Any] = field(default_factory=dict)
|
|
created_at: datetime = field(default_factory=utc_now)
|
|
updated_at: datetime = field(default_factory=utc_now)
|
|
|
|
@classmethod
|
|
def create(
|
|
cls,
|
|
name: str,
|
|
*,
|
|
domain: str | None = None,
|
|
parent_id: str | None = None,
|
|
collection_id: str | None = None,
|
|
metadata: dict[str, Any] | None = None,
|
|
) -> "Collection":
|
|
return cls(
|
|
id=collection_id or str(uuid.uuid4()),
|
|
name=name,
|
|
domain=domain,
|
|
parent_id=parent_id,
|
|
metadata=metadata or {},
|
|
)
|
|
|
|
def to_dict(self) -> dict[str, Any]:
|
|
return {
|
|
"id": self.id,
|
|
"name": self.name,
|
|
"domain": self.domain,
|
|
"parent_id": self.parent_id,
|
|
"metadata": dict(self.metadata),
|
|
"created_at": self.created_at.isoformat(),
|
|
"updated_at": self.updated_at.isoformat(),
|
|
}
|
|
|
|
@classmethod
|
|
def from_dict(cls, data: dict[str, Any]) -> "Collection":
|
|
return cls(
|
|
id=data["id"],
|
|
name=data["name"],
|
|
domain=data.get("domain"),
|
|
parent_id=data.get("parent_id"),
|
|
metadata=dict(data.get("metadata", {})),
|
|
created_at=datetime.fromisoformat(data["created_at"]),
|
|
updated_at=datetime.fromisoformat(data["updated_at"]),
|
|
)
|
|
|
|
|
|
@dataclass
|
|
class Artifact:
|
|
id: str
|
|
collection_id: str
|
|
name: str
|
|
artifact_type: ArtifactType
|
|
content_digest: str
|
|
content_size: int
|
|
content: str = ""
|
|
metadata: ArtifactMetadata = field(default_factory=ArtifactMetadata)
|
|
created_at: datetime = field(default_factory=utc_now)
|
|
updated_at: datetime = field(default_factory=utc_now)
|
|
|
|
@classmethod
|
|
def create(
|
|
cls,
|
|
collection_id: str,
|
|
name: str,
|
|
content: str,
|
|
*,
|
|
artifact_type: ArtifactType = ArtifactType.CONTENT,
|
|
metadata: ArtifactMetadata | None = None,
|
|
artifact_id: str | None = None,
|
|
) -> "Artifact":
|
|
return cls(
|
|
id=artifact_id or str(uuid.uuid4()),
|
|
collection_id=collection_id,
|
|
name=name,
|
|
artifact_type=artifact_type,
|
|
content_digest=content_digest(content),
|
|
content_size=len(content.encode("utf-8")),
|
|
content=content,
|
|
metadata=metadata or ArtifactMetadata(),
|
|
)
|
|
|
|
def update_content(self, content: str) -> None:
|
|
self.content = content
|
|
self.content_digest = content_digest(content)
|
|
self.content_size = len(content.encode("utf-8"))
|
|
self.updated_at = utc_now()
|
|
|
|
def has_changed(self, digest: str) -> bool:
|
|
return self.content_digest != digest
|
|
|
|
def to_dict(self) -> dict[str, Any]:
|
|
return {
|
|
"id": self.id,
|
|
"collection_id": self.collection_id,
|
|
"name": self.name,
|
|
"artifact_type": self.artifact_type.value,
|
|
"content_digest": self.content_digest,
|
|
"content_size": self.content_size,
|
|
"content": self.content,
|
|
"metadata": self.metadata.to_dict(),
|
|
"created_at": self.created_at.isoformat(),
|
|
"updated_at": self.updated_at.isoformat(),
|
|
}
|
|
|
|
@classmethod
|
|
def from_dict(cls, data: dict[str, Any]) -> "Artifact":
|
|
return cls(
|
|
id=data["id"],
|
|
collection_id=data["collection_id"],
|
|
name=data["name"],
|
|
artifact_type=ArtifactType(data["artifact_type"]),
|
|
content_digest=data["content_digest"],
|
|
content_size=int(data["content_size"]),
|
|
content=data.get("content", ""),
|
|
metadata=ArtifactMetadata.from_dict(data.get("metadata")),
|
|
created_at=datetime.fromisoformat(data["created_at"]),
|
|
updated_at=datetime.fromisoformat(data["updated_at"]),
|
|
)
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class ArtifactReference:
|
|
name: str
|
|
collection_id: str | None = None
|
|
version: str | None = None
|
|
|
|
@classmethod
|
|
def parse(cls, value: str) -> "ArtifactReference":
|
|
if not value:
|
|
raise ValueError("Artifact reference cannot be empty")
|
|
|
|
ref, version = (value.rsplit("@", 1) + [None])[:2] if "@" in value else (value, None)
|
|
if ":" in ref:
|
|
collection_id, name = ref.split(":", 1)
|
|
else:
|
|
collection_id, name = None, ref
|
|
if not name:
|
|
raise ValueError(f"Invalid artifact reference: {value}")
|
|
return cls(name=name, collection_id=collection_id or None, version=version)
|
|
|
|
def __str__(self) -> str:
|
|
ref = f"{self.collection_id}:{self.name}" if self.collection_id else self.name
|
|
return f"{ref}@{self.version}" if self.version else ref
|
|
|
|
|
|
@dataclass
|
|
class Relationship:
|
|
id: str
|
|
source_artifact_id: str
|
|
target_artifact_id: str
|
|
predicate: str
|
|
relationship_type: RelationshipType = RelationshipType.RELATES_TO
|
|
evidence: str = ""
|
|
provenance: dict[str, Any] = field(default_factory=dict)
|
|
created_at: datetime = field(default_factory=utc_now)
|
|
|
|
@classmethod
|
|
def create(
|
|
cls,
|
|
source_artifact_id: str,
|
|
target_artifact_id: str,
|
|
predicate: str,
|
|
*,
|
|
relationship_type: RelationshipType = RelationshipType.RELATES_TO,
|
|
evidence: str = "",
|
|
provenance: dict[str, Any] | None = None,
|
|
relationship_id: str | None = None,
|
|
) -> "Relationship":
|
|
return cls(
|
|
id=relationship_id or str(uuid.uuid4()),
|
|
source_artifact_id=source_artifact_id,
|
|
target_artifact_id=target_artifact_id,
|
|
predicate=predicate,
|
|
relationship_type=relationship_type,
|
|
evidence=evidence,
|
|
provenance=provenance or {},
|
|
)
|
|
|
|
def edge(self) -> tuple[str, str, str]:
|
|
return (self.source_artifact_id, self.target_artifact_id, self.predicate)
|
|
|
|
def to_dict(self) -> dict[str, Any]:
|
|
return {
|
|
"id": self.id,
|
|
"source_artifact_id": self.source_artifact_id,
|
|
"target_artifact_id": self.target_artifact_id,
|
|
"predicate": self.predicate,
|
|
"relationship_type": self.relationship_type.value,
|
|
"evidence": self.evidence,
|
|
"provenance": dict(self.provenance),
|
|
"created_at": self.created_at.isoformat(),
|
|
}
|
|
|
|
@classmethod
|
|
def from_dict(cls, data: dict[str, Any]) -> "Relationship":
|
|
return cls(
|
|
id=data["id"],
|
|
source_artifact_id=data["source_artifact_id"],
|
|
target_artifact_id=data["target_artifact_id"],
|
|
predicate=data["predicate"],
|
|
relationship_type=RelationshipType(data.get("relationship_type", "relates_to")),
|
|
evidence=data.get("evidence", ""),
|
|
provenance=dict(data.get("provenance", {})),
|
|
created_at=datetime.fromisoformat(data["created_at"]),
|
|
)
|
|
|