transformation registry, transformation runs, and derived artifact lineage

This commit is contained in:
2026-05-06 18:05:44 +02:00
parent 27c068f9ac
commit 43c06d6024
14 changed files with 1695 additions and 8 deletions

View File

@@ -41,6 +41,7 @@ from .relationships import (
RelationshipTargetKind,
)
from .retrieval_feedback import RetrievalFeedbackLabel, RetrievalFeedbackRecord
from .transformations import TransformationOperation, TransformationRun, TransformationRunStatus
__all__ = [
"Actor",
@@ -82,6 +83,9 @@ __all__ = [
"Sensitivity",
"SourceReference",
"SourcePayload",
"TransformationOperation",
"TransformationRun",
"TransformationRunStatus",
"VersionChangeType",
"content_digest",
"mapping_digest",

View File

@@ -159,3 +159,18 @@ class DerivedArtifactLineage:
if include_hash:
data["lineage_hash"] = self.lineage_hash
return data
@classmethod
def from_dict(cls, data: dict[str, Any]) -> "DerivedArtifactLineage":
return cls(
lineage_id=data["lineage_id"],
source_asset_ids=tuple(data.get("source_asset_ids", ())),
source_version_ids=tuple(data.get("source_version_ids", ())),
transformation_run_id=data["transformation_run_id"],
output_asset_id=data["output_asset_id"],
output_representation_id=data["output_representation_id"],
actor_id=data["actor_id"],
parameters=dict(data.get("parameters", {})),
policy_context=dict(data.get("policy_context", {})),
adapter_provenance=dict(data.get("adapter_provenance", {})),
)

View File

@@ -0,0 +1,228 @@
"""Transformation operation and run primitives."""
from __future__ import annotations
from dataclasses import dataclass, field, replace
from enum import Enum
from typing import Any
from .primitives import compact_dict, new_id, utc_now
class TransformationRunStatus(str, Enum):
QUEUED = "queued"
RUNNING = "running"
WAITING = "waiting"
COMPLETED = "completed"
PARTIALLY_COMPLETED = "partially_completed"
FAILED = "failed"
RETRIED = "retried"
CANCELED = "canceled"
@dataclass(frozen=True)
class TransformationOperation:
operation_id: str
name: str
description: str = ""
input_spec: tuple[str, ...] = ()
output_spec: tuple[str, ...] = ()
parameter_schema: dict[str, Any] = field(default_factory=dict)
required_permissions: tuple[str, ...] = ()
supported_asset_types: tuple[str, ...] = ()
adapter_ref: str | None = None
metadata: dict[str, Any] = field(default_factory=dict)
def __post_init__(self) -> None:
object.__setattr__(self, "input_spec", tuple(self.input_spec))
object.__setattr__(self, "output_spec", tuple(self.output_spec))
object.__setattr__(self, "required_permissions", tuple(self.required_permissions))
object.__setattr__(self, "supported_asset_types", tuple(self.supported_asset_types))
def supports_asset_type(self, asset_type: str) -> bool:
return not self.supported_asset_types or asset_type in self.supported_asset_types
def to_dict(self) -> dict[str, Any]:
return compact_dict(
{
"operation_id": self.operation_id,
"name": self.name,
"description": self.description,
"input_spec": list(self.input_spec),
"output_spec": list(self.output_spec),
"parameter_schema": dict(self.parameter_schema),
"required_permissions": list(self.required_permissions),
"supported_asset_types": list(self.supported_asset_types),
"adapter_ref": self.adapter_ref,
"metadata": dict(self.metadata),
}
)
@classmethod
def from_dict(cls, data: dict[str, Any]) -> "TransformationOperation":
return cls(
operation_id=data["operation_id"],
name=data["name"],
description=data.get("description", ""),
input_spec=tuple(data.get("input_spec", ())),
output_spec=tuple(data.get("output_spec", ())),
parameter_schema=dict(data.get("parameter_schema", {})),
required_permissions=tuple(data.get("required_permissions", ())),
supported_asset_types=tuple(data.get("supported_asset_types", ())),
adapter_ref=data.get("adapter_ref"),
metadata=dict(data.get("metadata", {})),
)
@dataclass(frozen=True)
class TransformationRun:
operation_id: str
source_asset_ids: tuple[str, ...]
source_version_ids: tuple[str, ...]
parameters: dict[str, Any]
actor_id: str
correlation_id: str
policy_context: dict[str, Any] = field(default_factory=dict)
status: TransformationRunStatus = TransformationRunStatus.QUEUED
output_asset_ids: tuple[str, ...] = ()
diagnostics: tuple[dict[str, Any], ...] = ()
retry_of_run_id: str | None = None
attempt: int = 1
run_id: str = field(default_factory=lambda: new_id("run"))
queued_at: str = field(default_factory=lambda: utc_now().isoformat())
started_at: str | None = None
completed_at: str | None = None
updated_at: str = field(default_factory=lambda: utc_now().isoformat())
def __post_init__(self) -> None:
object.__setattr__(self, "source_asset_ids", tuple(self.source_asset_ids))
object.__setattr__(self, "source_version_ids", tuple(self.source_version_ids))
object.__setattr__(self, "output_asset_ids", tuple(self.output_asset_ids))
object.__setattr__(self, "diagnostics", tuple(self.diagnostics))
def running(self) -> "TransformationRun":
now = utc_now().isoformat()
return replace(
self,
status=TransformationRunStatus.RUNNING,
started_at=self.started_at or now,
updated_at=now,
)
def waiting(self) -> "TransformationRun":
return replace(
self,
status=TransformationRunStatus.WAITING,
updated_at=utc_now().isoformat(),
)
def completed(self, *, output_asset_ids: tuple[str, ...] = ()) -> "TransformationRun":
now = utc_now().isoformat()
return replace(
self,
status=TransformationRunStatus.COMPLETED,
output_asset_ids=tuple(output_asset_ids),
completed_at=now,
updated_at=now,
)
def partially_completed(
self,
*,
output_asset_ids: tuple[str, ...] = (),
diagnostics: tuple[dict[str, Any], ...] = (),
) -> "TransformationRun":
now = utc_now().isoformat()
return replace(
self,
status=TransformationRunStatus.PARTIALLY_COMPLETED,
output_asset_ids=tuple(output_asset_ids),
diagnostics=self.diagnostics + tuple(diagnostics),
completed_at=now,
updated_at=now,
)
def failed(self, diagnostics: tuple[dict[str, Any], ...]) -> "TransformationRun":
now = utc_now().isoformat()
return replace(
self,
status=TransformationRunStatus.FAILED,
diagnostics=self.diagnostics + tuple(diagnostics),
completed_at=now,
updated_at=now,
)
def canceled(self, diagnostics: tuple[dict[str, Any], ...] = ()) -> "TransformationRun":
now = utc_now().isoformat()
return replace(
self,
status=TransformationRunStatus.CANCELED,
diagnostics=self.diagnostics + tuple(diagnostics),
completed_at=now,
updated_at=now,
)
def retried(self) -> "TransformationRun":
return replace(
self,
status=TransformationRunStatus.RETRIED,
updated_at=utc_now().isoformat(),
)
def retry(self, *, actor_id: str, correlation_id: str) -> "TransformationRun":
return TransformationRun(
operation_id=self.operation_id,
source_asset_ids=self.source_asset_ids,
source_version_ids=self.source_version_ids,
parameters=dict(self.parameters),
actor_id=actor_id,
correlation_id=correlation_id,
policy_context=dict(self.policy_context),
retry_of_run_id=self.run_id,
attempt=self.attempt + 1,
)
def to_dict(self) -> dict[str, Any]:
return compact_dict(
{
"run_id": self.run_id,
"operation_id": self.operation_id,
"source_asset_ids": list(self.source_asset_ids),
"source_version_ids": list(self.source_version_ids),
"parameters": dict(self.parameters),
"actor_id": self.actor_id,
"correlation_id": self.correlation_id,
"policy_context": dict(self.policy_context),
"status": self.status.value,
"output_asset_ids": list(self.output_asset_ids),
"diagnostics": list(self.diagnostics),
"retry_of_run_id": self.retry_of_run_id,
"attempt": self.attempt,
"queued_at": self.queued_at,
"started_at": self.started_at,
"completed_at": self.completed_at,
"updated_at": self.updated_at,
}
)
@classmethod
def from_dict(cls, data: dict[str, Any]) -> "TransformationRun":
return cls(
run_id=data["run_id"],
operation_id=data["operation_id"],
source_asset_ids=tuple(data.get("source_asset_ids", ())),
source_version_ids=tuple(data.get("source_version_ids", ())),
parameters=dict(data.get("parameters", {})),
actor_id=data["actor_id"],
correlation_id=data["correlation_id"],
policy_context=dict(data.get("policy_context", {})),
status=TransformationRunStatus(data.get("status", TransformationRunStatus.QUEUED.value)),
output_asset_ids=tuple(data.get("output_asset_ids", ())),
diagnostics=tuple(data.get("diagnostics", ())),
retry_of_run_id=data.get("retry_of_run_id"),
attempt=int(data.get("attempt", 1)),
queued_at=data["queued_at"],
started_at=data.get("started_at"),
completed_at=data.get("completed_at"),
updated_at=data["updated_at"],
)