Implemented durable workflow/job foundation

This commit is contained in:
2026-05-06 18:32:10 +02:00
parent 43c06d6024
commit 3b5f96e159
12 changed files with 2091 additions and 9 deletions

View File

@@ -58,6 +58,14 @@ from .core import (
TransformationRun,
TransformationRunStatus,
VersionChangeType,
WorkflowInputDefinition,
WorkflowInputKind,
WorkflowRun,
WorkflowRunStatus,
WorkflowStepDefinition,
WorkflowStepRun,
WorkflowStepRunStatus,
WorkflowTemplate,
)
from .errors import (
AdapterUnavailableError,
@@ -109,6 +117,9 @@ from .services import (
TransformationRequest,
TransformationRunResult,
TransformationService,
WorkflowInvocation,
WorkflowRunResult,
WorkflowService,
default_transformation_registry,
)
from .storage import InMemoryKnowledgeRepository
@@ -232,7 +243,18 @@ __all__ = [
"TransformationService",
"ValidationError",
"VersionChangeType",
"WorkflowInputDefinition",
"WorkflowInputKind",
"WorkflowInvocation",
"WorkflowRun",
"WorkflowRunResult",
"WorkflowRunStatus",
"WorkflowService",
"WorkflowStep",
"WorkflowStepDefinition",
"WorkflowStepRun",
"WorkflowStepRunStatus",
"WorkflowTemplate",
"bundle_digest",
"content_digest",
"default_transformation_registry",

View File

@@ -26,6 +26,9 @@ from kontextual_engine.core import (
Sensitivity,
TransformationRun,
TransformationRunStatus,
WorkflowRun,
WorkflowRunStatus,
WorkflowTemplate,
)
from kontextual_engine.errors import NotFoundError, ValidationError
@@ -47,6 +50,8 @@ class InMemoryAssetRegistryRepository:
ingestion_jobs: dict[str, IngestionJob] = field(default_factory=dict)
transformation_runs: dict[str, TransformationRun] = field(default_factory=dict)
derived_lineage: dict[str, DerivedArtifactLineage] = field(default_factory=dict)
workflow_templates: dict[tuple[str, str], WorkflowTemplate] = field(default_factory=dict)
workflow_runs: dict[str, WorkflowRun] = field(default_factory=dict)
def save_actor(self, actor: Actor) -> Actor:
self.actors[actor.id] = actor
@@ -357,6 +362,64 @@ class InMemoryAssetRegistryRepository:
records = [record for record in records if record.transformation_run_id == transformation_run_id]
return sorted(records, key=lambda record: (record.transformation_run_id, record.lineage_id))
def save_workflow_template(self, template: WorkflowTemplate) -> WorkflowTemplate:
self.workflow_templates[(template.template_id, template.version)] = template
return template
def get_workflow_template(
self,
template_id: str,
*,
version: str | None = None,
) -> WorkflowTemplate:
if version is not None:
try:
return self.workflow_templates[(template_id, version)]
except KeyError as exc:
raise NotFoundError(
"Workflow template not found",
details={"template_id": template_id, "version": version},
) from exc
candidates = [item for key, item in self.workflow_templates.items() if key[0] == template_id]
if not candidates:
raise NotFoundError("Workflow template not found", details={"template_id": template_id})
return sorted(candidates, key=lambda item: (item.updated_at, item.version))[-1]
def list_workflow_templates(
self,
*,
template_id: str | None = None,
) -> list[WorkflowTemplate]:
templates: Iterable[WorkflowTemplate] = self.workflow_templates.values()
if template_id is not None:
templates = [item for item in templates if item.template_id == template_id]
return sorted(templates, key=lambda item: (item.name, item.version, item.template_id))
def save_workflow_run(self, run: WorkflowRun) -> WorkflowRun:
self.get_actor(run.actor_id)
self.get_workflow_template(run.template_id, version=run.template_version)
self.workflow_runs[run.run_id] = run
return run
def get_workflow_run(self, run_id: str) -> WorkflowRun:
try:
return self.workflow_runs[run_id]
except KeyError as exc:
raise NotFoundError("Workflow run not found", details={"run_id": run_id}) from exc
def list_workflow_runs(
self,
*,
status: WorkflowRunStatus | None = None,
template_id: str | None = None,
) -> list[WorkflowRun]:
runs: Iterable[WorkflowRun] = self.workflow_runs.values()
if status is not None:
runs = [run for run in runs if run.status == status]
if template_id is not None:
runs = [run for run in runs if run.template_id == template_id]
return sorted(runs, key=lambda run: (run.queued_at, run.run_id))
def _metadata_matches(
records: list[MetadataRecord],

View File

@@ -29,6 +29,9 @@ from kontextual_engine.core import (
Sensitivity,
TransformationRun,
TransformationRunStatus,
WorkflowRun,
WorkflowRunStatus,
WorkflowTemplate,
)
from kontextual_engine.errors import NotFoundError, ValidationError
@@ -723,6 +726,139 @@ class SQLiteAssetRegistryRepository:
records = [record for record in records if source_asset_id in record.source_asset_ids]
return records
def save_workflow_template(self, template: WorkflowTemplate) -> WorkflowTemplate:
with self._connect() as conn:
conn.execute(
"""
insert into workflow_templates
(id, version, name, updated_at, payload)
values (?, ?, ?, ?, ?)
on conflict(id, version) do update set
name=excluded.name,
updated_at=excluded.updated_at,
payload=excluded.payload
""",
(
template.template_id,
template.version,
template.name,
template.updated_at,
_json(template.to_dict()),
),
)
return template
def get_workflow_template(
self,
template_id: str,
*,
version: str | None = None,
) -> WorkflowTemplate:
if version is None:
row = self._one(
"""
select payload from workflow_templates
where id = ?
order by updated_at desc, version desc
limit 1
""",
(template_id,),
)
else:
row = self._one(
"select payload from workflow_templates where id = ? and version = ?",
(template_id, version),
)
if row is None:
details = {"template_id": template_id}
if version is not None:
details["version"] = version
raise NotFoundError("Workflow template not found", details=details)
return WorkflowTemplate.from_dict(_loads(row["payload"]))
def list_workflow_templates(
self,
*,
template_id: str | None = None,
) -> list[WorkflowTemplate]:
if template_id is None:
rows = self._all("select payload from workflow_templates order by name, version, id", ())
else:
rows = self._all(
"select payload from workflow_templates where id = ? order by name, version, id",
(template_id,),
)
return [WorkflowTemplate.from_dict(_loads(row["payload"])) for row in rows]
def save_workflow_run(self, run: WorkflowRun) -> WorkflowRun:
try:
with self._connect() as conn:
conn.execute(
"""
insert into workflow_runs
(id, template_id, template_version, status, actor_id, correlation_id,
queued_at, updated_at, payload)
values (?, ?, ?, ?, ?, ?, ?, ?, ?)
on conflict(id) do update set
template_id=excluded.template_id,
template_version=excluded.template_version,
status=excluded.status,
actor_id=excluded.actor_id,
correlation_id=excluded.correlation_id,
queued_at=excluded.queued_at,
updated_at=excluded.updated_at,
payload=excluded.payload
""",
(
run.run_id,
run.template_id,
run.template_version,
run.status.value,
run.actor_id,
run.correlation_id,
run.queued_at,
run.updated_at,
_json(run.to_dict()),
),
)
except sqlite3.IntegrityError as exc:
if _is_foreign_key_error(exc):
raise ValidationError(
"Workflow run references an unknown actor or template",
details={
"actor_id": run.actor_id,
"template_id": run.template_id,
"template_version": run.template_version,
"run_id": run.run_id,
},
) from exc
raise
return run
def get_workflow_run(self, run_id: str) -> WorkflowRun:
row = self._one("select payload from workflow_runs where id = ?", (run_id,))
if row is None:
raise NotFoundError("Workflow run not found", details={"run_id": run_id})
return WorkflowRun.from_dict(_loads(row["payload"]))
def list_workflow_runs(
self,
*,
status: WorkflowRunStatus | None = None,
template_id: str | None = None,
) -> list[WorkflowRun]:
clauses = []
params: list[Any] = []
if status is not None:
clauses.append("status = ?")
params.append(status.value)
if template_id is not None:
clauses.append("template_id = ?")
params.append(template_id)
where = f" where {' and '.join(clauses)}" if clauses else ""
rows = self._all(f"select payload from workflow_runs{where} order by queued_at, id", tuple(params))
return [WorkflowRun.from_dict(_loads(row["payload"])) for row in rows]
def _initialize(self) -> None:
with self._connect() as conn:
conn.executescript(
@@ -839,6 +975,28 @@ class SQLiteAssetRegistryRepository:
transformation_run_id text not null references transformation_runs(id) on delete cascade,
payload text not null
);
create table if not exists workflow_templates (
id text not null,
version text not null,
name text not null,
updated_at text not null,
payload text not null,
primary key(id, version)
);
create table if not exists workflow_runs (
id text primary key,
template_id text not null,
template_version text not null,
status text not null,
actor_id text not null,
correlation_id text not null,
queued_at text not null,
updated_at text not null,
payload text not null,
foreign key(actor_id) references actors(id),
foreign key(template_id, template_version)
references workflow_templates(id, version)
);
create index if not exists idx_assets_lifecycle on assets(lifecycle);
create index if not exists idx_representations_asset on representations(asset_id);
create index if not exists idx_metadata_asset on metadata_records(asset_id);
@@ -858,6 +1016,10 @@ class SQLiteAssetRegistryRepository:
create index if not exists idx_transformation_runs_correlation on transformation_runs(correlation_id);
create index if not exists idx_derived_lineage_output on derived_lineage(output_asset_id);
create index if not exists idx_derived_lineage_run on derived_lineage(transformation_run_id);
create index if not exists idx_workflow_templates_name on workflow_templates(name);
create index if not exists idx_workflow_runs_status on workflow_runs(status);
create index if not exists idx_workflow_runs_template on workflow_runs(template_id);
create index if not exists idx_workflow_runs_correlation on workflow_runs(correlation_id);
"""
)

View File

@@ -42,6 +42,16 @@ from .relationships import (
)
from .retrieval_feedback import RetrievalFeedbackLabel, RetrievalFeedbackRecord
from .transformations import TransformationOperation, TransformationRun, TransformationRunStatus
from .workflow_jobs import (
WorkflowInputDefinition,
WorkflowInputKind,
WorkflowRun,
WorkflowRunStatus,
WorkflowStepDefinition,
WorkflowStepRun,
WorkflowStepRunStatus,
WorkflowTemplate,
)
__all__ = [
"Actor",
@@ -87,6 +97,14 @@ __all__ = [
"TransformationRun",
"TransformationRunStatus",
"VersionChangeType",
"WorkflowInputDefinition",
"WorkflowInputKind",
"WorkflowRun",
"WorkflowRunStatus",
"WorkflowStepDefinition",
"WorkflowStepRun",
"WorkflowStepRunStatus",
"WorkflowTemplate",
"content_digest",
"mapping_digest",
"new_id",

View File

@@ -0,0 +1,462 @@
"""Workflow template and durable run primitives."""
from __future__ import annotations
from dataclasses import dataclass, field, replace
from enum import Enum
from typing import Any
from .primitives import compact_dict, mapping_digest, new_id, utc_now
class WorkflowInputKind(str, Enum):
ASSET = "asset"
COLLECTION = "collection"
QUERY = "query"
SOURCE_EVENT = "source_event"
PAYLOAD = "payload"
class WorkflowRunStatus(str, Enum):
QUEUED = "queued"
RUNNING = "running"
WAITING = "waiting"
COMPLETED = "completed"
PARTIALLY_COMPLETED = "partially_completed"
FAILED = "failed"
RETRIED = "retried"
CANCELED = "canceled"
class WorkflowStepRunStatus(str, Enum):
QUEUED = "queued"
RUNNING = "running"
WAITING = "waiting"
COMPLETED = "completed"
FAILED = "failed"
SKIPPED = "skipped"
CANCELED = "canceled"
@dataclass(frozen=True)
class WorkflowInputDefinition:
name: str
kind: WorkflowInputKind | str
required: bool = True
description: str = ""
metadata: dict[str, Any] = field(default_factory=dict)
def __post_init__(self) -> None:
object.__setattr__(self, "kind", WorkflowInputKind(self.kind))
def to_dict(self) -> dict[str, Any]:
return compact_dict(
{
"name": self.name,
"kind": self.kind.value,
"required": self.required,
"description": self.description,
"metadata": dict(self.metadata),
}
)
@classmethod
def from_dict(cls, data: dict[str, Any]) -> "WorkflowInputDefinition":
return cls(
name=data["name"],
kind=WorkflowInputKind(data["kind"]),
required=bool(data.get("required", True)),
description=data.get("description", ""),
metadata=dict(data.get("metadata", {})),
)
@dataclass(frozen=True)
class WorkflowStepDefinition:
step_id: str
kind: str = "transformation"
operation_id: str | None = None
depends_on: tuple[str, ...] = ()
inputs: dict[str, Any] = field(default_factory=dict)
parameters: dict[str, Any] = field(default_factory=dict)
outputs: dict[str, Any] = field(default_factory=dict)
preconditions: tuple[dict[str, Any], ...] = ()
failure_behavior: str = "fail_workflow"
required_permissions: tuple[str, ...] = ()
metadata: dict[str, Any] = field(default_factory=dict)
def __post_init__(self) -> None:
object.__setattr__(self, "depends_on", tuple(self.depends_on))
object.__setattr__(self, "preconditions", tuple(dict(item) for item in self.preconditions))
object.__setattr__(self, "required_permissions", tuple(self.required_permissions))
def to_dict(self) -> dict[str, Any]:
return compact_dict(
{
"step_id": self.step_id,
"kind": self.kind,
"operation_id": self.operation_id,
"depends_on": list(self.depends_on),
"inputs": dict(self.inputs),
"parameters": dict(self.parameters),
"outputs": dict(self.outputs),
"preconditions": list(self.preconditions),
"failure_behavior": self.failure_behavior,
"required_permissions": list(self.required_permissions),
"metadata": dict(self.metadata),
}
)
@classmethod
def from_dict(cls, data: dict[str, Any]) -> "WorkflowStepDefinition":
return cls(
step_id=data["step_id"],
kind=data.get("kind", "transformation"),
operation_id=data.get("operation_id"),
depends_on=tuple(data.get("depends_on", ())),
inputs=dict(data.get("inputs", {})),
parameters=dict(data.get("parameters", {})),
outputs=dict(data.get("outputs", {})),
preconditions=tuple(dict(item) for item in data.get("preconditions", ())),
failure_behavior=data.get("failure_behavior", "fail_workflow"),
required_permissions=tuple(data.get("required_permissions", ())),
metadata=dict(data.get("metadata", {})),
)
@dataclass(frozen=True)
class WorkflowTemplate:
name: str
inputs: tuple[WorkflowInputDefinition, ...] = ()
steps: tuple[WorkflowStepDefinition, ...] = ()
version: str = "1"
description: str = ""
policy_checks: tuple[dict[str, Any], ...] = ()
failure_behavior: str = "fail_workflow"
metadata: dict[str, Any] = field(default_factory=dict)
created_by: str | None = None
template_id: str = field(default_factory=lambda: new_id("wftpl"))
created_at: str = field(default_factory=lambda: utc_now().isoformat())
updated_at: str = field(default_factory=lambda: utc_now().isoformat())
def __post_init__(self) -> None:
object.__setattr__(self, "inputs", tuple(self.inputs))
object.__setattr__(self, "steps", tuple(self.steps))
object.__setattr__(self, "policy_checks", tuple(dict(item) for item in self.policy_checks))
@property
def template_hash(self) -> str:
return mapping_digest(self.to_dict(include_hash=False))
def with_actor(self, actor_id: str) -> "WorkflowTemplate":
return replace(self, created_by=self.created_by or actor_id, updated_at=utc_now().isoformat())
def to_dict(self, *, include_hash: bool = True) -> dict[str, Any]:
data = compact_dict(
{
"template_id": self.template_id,
"name": self.name,
"version": self.version,
"description": self.description,
"inputs": [item.to_dict() for item in self.inputs],
"steps": [item.to_dict() for item in self.steps],
"policy_checks": list(self.policy_checks),
"failure_behavior": self.failure_behavior,
"metadata": dict(self.metadata),
"created_by": self.created_by,
"created_at": self.created_at,
"updated_at": self.updated_at,
}
)
if include_hash:
data["template_hash"] = self.template_hash
return data
@classmethod
def from_dict(cls, data: dict[str, Any]) -> "WorkflowTemplate":
return cls(
template_id=data["template_id"],
name=data["name"],
version=data.get("version", "1"),
description=data.get("description", ""),
inputs=tuple(WorkflowInputDefinition.from_dict(item) for item in data.get("inputs", ())),
steps=tuple(WorkflowStepDefinition.from_dict(item) for item in data.get("steps", ())),
policy_checks=tuple(dict(item) for item in data.get("policy_checks", ())),
failure_behavior=data.get("failure_behavior", "fail_workflow"),
metadata=dict(data.get("metadata", {})),
created_by=data.get("created_by"),
created_at=data["created_at"],
updated_at=data["updated_at"],
)
@dataclass(frozen=True)
class WorkflowStepRun:
step_id: str
operation_id: str | None = None
status: WorkflowStepRunStatus = WorkflowStepRunStatus.QUEUED
transformation_run_id: str | None = None
output_asset_ids: tuple[str, ...] = ()
diagnostics: tuple[dict[str, Any], ...] = ()
started_at: str | None = None
completed_at: str | None = None
updated_at: str = field(default_factory=lambda: utc_now().isoformat())
def __post_init__(self) -> None:
object.__setattr__(self, "status", WorkflowStepRunStatus(self.status))
object.__setattr__(self, "output_asset_ids", tuple(self.output_asset_ids))
object.__setattr__(self, "diagnostics", tuple(self.diagnostics))
def running(self) -> "WorkflowStepRun":
now = utc_now().isoformat()
return replace(
self,
status=WorkflowStepRunStatus.RUNNING,
started_at=self.started_at or now,
updated_at=now,
)
def waiting(self, diagnostics: tuple[dict[str, Any], ...] = ()) -> "WorkflowStepRun":
return replace(
self,
status=WorkflowStepRunStatus.WAITING,
diagnostics=self.diagnostics + tuple(diagnostics),
updated_at=utc_now().isoformat(),
)
def completed(
self,
*,
transformation_run_id: str | None = None,
output_asset_ids: tuple[str, ...] = (),
) -> "WorkflowStepRun":
now = utc_now().isoformat()
return replace(
self,
status=WorkflowStepRunStatus.COMPLETED,
transformation_run_id=transformation_run_id or self.transformation_run_id,
output_asset_ids=tuple(output_asset_ids),
completed_at=now,
updated_at=now,
)
def failed(self, diagnostics: tuple[dict[str, Any], ...]) -> "WorkflowStepRun":
now = utc_now().isoformat()
return replace(
self,
status=WorkflowStepRunStatus.FAILED,
diagnostics=self.diagnostics + tuple(diagnostics),
completed_at=now,
updated_at=now,
)
def skipped(self, diagnostics: tuple[dict[str, Any], ...]) -> "WorkflowStepRun":
now = utc_now().isoformat()
return replace(
self,
status=WorkflowStepRunStatus.SKIPPED,
diagnostics=self.diagnostics + tuple(diagnostics),
completed_at=now,
updated_at=now,
)
def canceled(self, diagnostics: tuple[dict[str, Any], ...] = ()) -> "WorkflowStepRun":
now = utc_now().isoformat()
return replace(
self,
status=WorkflowStepRunStatus.CANCELED,
diagnostics=self.diagnostics + tuple(diagnostics),
completed_at=now,
updated_at=now,
)
def to_dict(self) -> dict[str, Any]:
return compact_dict(
{
"step_id": self.step_id,
"operation_id": self.operation_id,
"status": self.status.value,
"transformation_run_id": self.transformation_run_id,
"output_asset_ids": list(self.output_asset_ids),
"diagnostics": list(self.diagnostics),
"started_at": self.started_at,
"completed_at": self.completed_at,
"updated_at": self.updated_at,
}
)
@classmethod
def from_dict(cls, data: dict[str, Any]) -> "WorkflowStepRun":
return cls(
step_id=data["step_id"],
operation_id=data.get("operation_id"),
status=WorkflowStepRunStatus(data.get("status", WorkflowStepRunStatus.QUEUED.value)),
transformation_run_id=data.get("transformation_run_id"),
output_asset_ids=tuple(data.get("output_asset_ids", ())),
diagnostics=tuple(data.get("diagnostics", ())),
started_at=data.get("started_at"),
completed_at=data.get("completed_at"),
updated_at=data["updated_at"],
)
@dataclass(frozen=True)
class WorkflowRun:
template_id: str
template_version: str
input_bindings: dict[str, Any]
actor_id: str
correlation_id: str
policy_context: dict[str, Any] = field(default_factory=dict)
status: WorkflowRunStatus = WorkflowRunStatus.QUEUED
step_runs: tuple[WorkflowStepRun, ...] = ()
output_asset_ids: tuple[str, ...] = ()
diagnostics: tuple[dict[str, Any], ...] = ()
retry_of_run_id: str | None = None
attempt: int = 1
run_id: str = field(default_factory=lambda: new_id("wfrun"))
queued_at: str = field(default_factory=lambda: utc_now().isoformat())
started_at: str | None = None
completed_at: str | None = None
updated_at: str = field(default_factory=lambda: utc_now().isoformat())
def __post_init__(self) -> None:
object.__setattr__(self, "status", WorkflowRunStatus(self.status))
object.__setattr__(self, "step_runs", tuple(self.step_runs))
object.__setattr__(self, "output_asset_ids", tuple(self.output_asset_ids))
object.__setattr__(self, "diagnostics", tuple(self.diagnostics))
def running(self) -> "WorkflowRun":
now = utc_now().isoformat()
return replace(
self,
status=WorkflowRunStatus.RUNNING,
started_at=self.started_at or now,
updated_at=now,
)
def waiting(self, diagnostics: tuple[dict[str, Any], ...] = ()) -> "WorkflowRun":
return replace(
self,
status=WorkflowRunStatus.WAITING,
diagnostics=self.diagnostics + tuple(diagnostics),
updated_at=utc_now().isoformat(),
)
def completed(self, *, output_asset_ids: tuple[str, ...] = ()) -> "WorkflowRun":
now = utc_now().isoformat()
return replace(
self,
status=WorkflowRunStatus.COMPLETED,
output_asset_ids=tuple(output_asset_ids),
completed_at=now,
updated_at=now,
)
def partially_completed(
self,
*,
output_asset_ids: tuple[str, ...] = (),
diagnostics: tuple[dict[str, Any], ...] = (),
) -> "WorkflowRun":
now = utc_now().isoformat()
return replace(
self,
status=WorkflowRunStatus.PARTIALLY_COMPLETED,
output_asset_ids=tuple(output_asset_ids),
diagnostics=self.diagnostics + tuple(diagnostics),
completed_at=now,
updated_at=now,
)
def failed(self, diagnostics: tuple[dict[str, Any], ...]) -> "WorkflowRun":
now = utc_now().isoformat()
return replace(
self,
status=WorkflowRunStatus.FAILED,
diagnostics=self.diagnostics + tuple(diagnostics),
completed_at=now,
updated_at=now,
)
def canceled(self, diagnostics: tuple[dict[str, Any], ...] = ()) -> "WorkflowRun":
now = utc_now().isoformat()
return replace(
self,
status=WorkflowRunStatus.CANCELED,
diagnostics=self.diagnostics + tuple(diagnostics),
completed_at=now,
updated_at=now,
)
def retried(self) -> "WorkflowRun":
return replace(
self,
status=WorkflowRunStatus.RETRIED,
updated_at=utc_now().isoformat(),
)
def retry(self, *, actor_id: str, correlation_id: str) -> "WorkflowRun":
return WorkflowRun(
template_id=self.template_id,
template_version=self.template_version,
input_bindings=dict(self.input_bindings),
actor_id=actor_id,
correlation_id=correlation_id,
policy_context=dict(self.policy_context),
retry_of_run_id=self.run_id,
attempt=self.attempt + 1,
)
def with_step_run(self, step_run: WorkflowStepRun) -> "WorkflowRun":
existing = {item.step_id: item for item in self.step_runs}
existing[step_run.step_id] = step_run
ordered = tuple(existing[item.step_id] for item in self.step_runs if item.step_id in existing)
if step_run.step_id not in {item.step_id for item in self.step_runs}:
ordered = ordered + (step_run,)
return replace(self, step_runs=ordered, updated_at=utc_now().isoformat())
def to_dict(self) -> dict[str, Any]:
return compact_dict(
{
"run_id": self.run_id,
"template_id": self.template_id,
"template_version": self.template_version,
"input_bindings": dict(self.input_bindings),
"actor_id": self.actor_id,
"correlation_id": self.correlation_id,
"policy_context": dict(self.policy_context),
"status": self.status.value,
"step_runs": [item.to_dict() for item in self.step_runs],
"output_asset_ids": list(self.output_asset_ids),
"diagnostics": list(self.diagnostics),
"retry_of_run_id": self.retry_of_run_id,
"attempt": self.attempt,
"queued_at": self.queued_at,
"started_at": self.started_at,
"completed_at": self.completed_at,
"updated_at": self.updated_at,
}
)
@classmethod
def from_dict(cls, data: dict[str, Any]) -> "WorkflowRun":
return cls(
run_id=data["run_id"],
template_id=data["template_id"],
template_version=data.get("template_version", "1"),
input_bindings=dict(data.get("input_bindings", {})),
actor_id=data["actor_id"],
correlation_id=data["correlation_id"],
policy_context=dict(data.get("policy_context", {})),
status=WorkflowRunStatus(data.get("status", WorkflowRunStatus.QUEUED.value)),
step_runs=tuple(WorkflowStepRun.from_dict(item) for item in data.get("step_runs", ())),
output_asset_ids=tuple(data.get("output_asset_ids", ())),
diagnostics=tuple(data.get("diagnostics", ())),
retry_of_run_id=data.get("retry_of_run_id"),
attempt=int(data.get("attempt", 1)),
queued_at=data["queued_at"],
started_at=data.get("started_at"),
completed_at=data.get("completed_at"),
updated_at=data["updated_at"],
)

View File

@@ -25,6 +25,9 @@ from kontextual_engine.core import (
Sensitivity,
TransformationRun,
TransformationRunStatus,
WorkflowRun,
WorkflowRunStatus,
WorkflowTemplate,
)
@@ -131,3 +134,25 @@ class AssetRegistryRepository(Protocol):
source_asset_id: str | None = None,
transformation_run_id: str | None = None,
) -> list[DerivedArtifactLineage]: ...
def save_workflow_template(self, template: WorkflowTemplate) -> WorkflowTemplate: ...
def get_workflow_template(
self,
template_id: str,
*,
version: str | None = None,
) -> WorkflowTemplate: ...
def list_workflow_templates(
self,
*,
template_id: str | None = None,
) -> list[WorkflowTemplate]: ...
def save_workflow_run(self, run: WorkflowRun) -> WorkflowRun: ...
def get_workflow_run(self, run_id: str) -> WorkflowRun: ...
def list_workflow_runs(
self,
*,
status: WorkflowRunStatus | None = None,
template_id: str | None = None,
) -> list[WorkflowRun]: ...

View File

@@ -32,6 +32,7 @@ from .transformation_service import (
TransformationService,
default_transformation_registry,
)
from .workflow_service import WorkflowInvocation, WorkflowRunResult, WorkflowService
__all__ = [
"AssetChangeResult",
@@ -60,5 +61,8 @@ __all__ = [
"TransformationRequest",
"TransformationRunResult",
"TransformationService",
"WorkflowInvocation",
"WorkflowRunResult",
"WorkflowService",
"default_transformation_registry",
]

View File

@@ -0,0 +1,853 @@
"""Durable workflow templates and MVP job runner."""
from __future__ import annotations
from dataclasses import dataclass, field, replace
from typing import Any
from kontextual_engine.core import (
AuditEvent,
AuditOutcome,
OperationContext,
PolicyDecision,
WorkflowRun,
WorkflowRunStatus,
WorkflowStepDefinition,
WorkflowStepRun,
WorkflowStepRunStatus,
WorkflowTemplate,
)
from kontextual_engine.errors import AuthorizationError, Diagnostic, NotFoundError, ValidationError
from kontextual_engine.ports import AllowAllPolicyGateway, AssetRegistryRepository, PolicyGateway
from .transformation_service import TransformationRequest, TransformationRunResult, TransformationService
@dataclass(frozen=True)
class WorkflowInvocation:
template_id: str
inputs: dict[str, Any] = field(default_factory=dict)
template_version: str | None = None
metadata: dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> dict[str, Any]:
return {
"template_id": self.template_id,
"template_version": self.template_version,
"inputs": dict(self.inputs),
"metadata": dict(self.metadata),
}
@dataclass(frozen=True)
class WorkflowRunResult:
run: WorkflowRun
success: bool
diagnostics: tuple[Diagnostic, ...] = ()
audit_event: AuditEvent | None = None
policy_decision: PolicyDecision | None = None
step_results: tuple[TransformationRunResult, ...] = ()
def __post_init__(self) -> None:
object.__setattr__(self, "diagnostics", tuple(self.diagnostics))
object.__setattr__(self, "step_results", tuple(self.step_results))
def to_dict(self) -> dict[str, Any]:
return {
"success": self.success,
"run": self.run.to_dict(),
"diagnostics": [item.to_dict() for item in self.diagnostics],
"audit_event": self.audit_event.to_dict() if self.audit_event else None,
"policy_decision": self.policy_decision.to_dict() if self.policy_decision else None,
"step_results": [item.to_dict() for item in self.step_results],
}
class WorkflowService:
def __init__(
self,
repository: AssetRegistryRepository,
*,
transformation_service: TransformationService | None = None,
policy_gateway: PolicyGateway | None = None,
) -> None:
self.repository = repository
self.policy_gateway = policy_gateway or AllowAllPolicyGateway()
self.transformation_service = transformation_service or TransformationService(
repository,
policy_gateway=self.policy_gateway,
)
def register_template(self, template: WorkflowTemplate, context: OperationContext) -> WorkflowTemplate:
self.repository.save_actor(context.actor)
diagnostics = _template_diagnostics(template)
if diagnostics:
raise ValidationError(
"Workflow template is invalid",
details={"diagnostics": [item.to_dict() for item in diagnostics]},
)
template = template.with_actor(context.actor.id)
decision = self._authorize(
context,
"workflow.template.register",
f"workflow_template:{template.template_id}",
resource_metadata={"template": template.to_dict()},
)
if not decision.allowed:
self._audit(
"workflow.template.register",
f"workflow_template:{template.template_id}",
AuditOutcome.DENIED,
context,
decision,
)
raise AuthorizationError(
"Operation denied by policy",
details={
"action": "workflow.template.register",
"resource": f"workflow_template:{template.template_id}",
"correlation_id": context.correlation_id,
"policy_decision": decision.to_dict(),
},
)
saved = self.repository.save_workflow_template(template)
self._audit(
"workflow.template.register",
f"workflow_template:{template.template_id}",
AuditOutcome.SUCCESS,
context,
decision,
details={"template_version": template.version, "template_hash": template.template_hash},
)
return saved
def get_template(self, template_id: str, *, version: str | None = None) -> WorkflowTemplate:
return self.repository.get_workflow_template(template_id, version=version)
def list_templates(self, *, template_id: str | None = None) -> tuple[WorkflowTemplate, ...]:
return tuple(self.repository.list_workflow_templates(template_id=template_id))
def queue_template(self, invocation: WorkflowInvocation, context: OperationContext) -> WorkflowRunResult:
template = self.repository.get_workflow_template(
invocation.template_id,
version=invocation.template_version,
)
self.repository.save_actor(context.actor)
decision = self._authorize(
context,
"workflow.run.execute",
f"workflow_template:{template.template_id}",
resource_metadata={"template": template.to_dict(), "invocation": invocation.to_dict()},
)
run = WorkflowRun(
template_id=template.template_id,
template_version=template.version,
input_bindings=dict(invocation.inputs),
actor_id=context.actor.id,
correlation_id=context.correlation_id,
policy_context={"run_execute": decision.to_dict()},
step_runs=_initial_step_runs(template),
)
self.repository.save_workflow_run(run)
self._audit(
"workflow.run.queued",
f"workflow_run:{run.run_id}",
AuditOutcome.SUCCESS,
context,
decision,
details={"template_id": template.template_id, "template_version": template.version},
)
if not decision.allowed:
diagnostic = _permission_diagnostic(decision)
failed = run.failed((_diagnostic_dict(diagnostic),))
self.repository.save_workflow_run(failed)
event = self._audit(
"workflow.run.execute",
f"workflow_run:{run.run_id}",
AuditOutcome.DENIED,
context,
decision,
details={"template_id": template.template_id},
)
return WorkflowRunResult(
run=failed,
success=False,
diagnostics=(diagnostic,),
audit_event=event,
policy_decision=decision,
)
input_diagnostics = _input_diagnostics(template, invocation)
if input_diagnostics:
failed = run.failed(tuple(_diagnostic_dict(item) for item in input_diagnostics))
self.repository.save_workflow_run(failed)
event = self._audit(
"workflow.run.failed",
f"workflow_run:{run.run_id}",
AuditOutcome.FAILED,
context,
decision,
details={"diagnostics": [item.to_dict() for item in input_diagnostics]},
)
return WorkflowRunResult(
run=failed,
success=False,
diagnostics=tuple(input_diagnostics),
audit_event=event,
policy_decision=decision,
)
return WorkflowRunResult(run=run, success=True, policy_decision=decision)
def invoke_template(self, invocation: WorkflowInvocation, context: OperationContext) -> WorkflowRunResult:
queued = self.queue_template(invocation, context)
if not queued.success:
return queued
return self.resume_run(queued.run.run_id, context)
def resume_run(self, run_id: str, context: OperationContext) -> WorkflowRunResult:
run = self.repository.get_workflow_run(run_id)
if run.status in (
WorkflowRunStatus.COMPLETED,
WorkflowRunStatus.CANCELED,
WorkflowRunStatus.RETRIED,
):
diagnostic = Diagnostic(
severity="error",
code="workflow.run_not_resumable",
message="Workflow run cannot be resumed from its current status",
details={"run_id": run_id, "status": run.status.value},
)
return WorkflowRunResult(run=run, success=False, diagnostics=(diagnostic,))
template = self.repository.get_workflow_template(run.template_id, version=run.template_version)
return self._execute_run(template, run, context)
def retry_run(self, run_id: str, context: OperationContext) -> WorkflowRunResult:
previous = self.repository.get_workflow_run(run_id)
marked = previous.retried()
self.repository.save_workflow_run(marked)
retry = previous.retry(actor_id=context.actor.id, correlation_id=context.correlation_id)
retry = replace(retry, step_runs=tuple(_reset_step_run(item) for item in previous.step_runs))
self.repository.save_actor(context.actor)
self.repository.save_workflow_run(retry)
decision = PolicyDecision.allow(
context.actor.id,
"workflow.run.retry",
f"workflow_run:{run_id}",
context={"previous_run_id": run_id, "retry_run_id": retry.run_id},
)
self._audit(
"workflow.run.retried",
f"workflow_run:{run_id}",
AuditOutcome.SUCCESS,
context,
decision,
details={"retry_run_id": retry.run_id},
)
return self.resume_run(retry.run_id, context)
def cancel_run(self, run_id: str, context: OperationContext, *, reason: str | None = None) -> WorkflowRun:
run = self.repository.get_workflow_run(run_id)
diagnostic = Diagnostic(
severity="warning",
code="workflow.run_canceled",
message="Workflow run was canceled",
details={"reason": reason} if reason else {},
)
canceled_steps = tuple(
step.canceled((_diagnostic_dict(diagnostic),))
if step.status in (WorkflowStepRunStatus.QUEUED, WorkflowStepRunStatus.WAITING)
else step
for step in run.step_runs
)
canceled = replace(run, step_runs=canceled_steps).canceled((_diagnostic_dict(diagnostic),))
self.repository.save_actor(context.actor)
decision = self._authorize(
context,
"workflow.run.cancel",
f"workflow_run:{run_id}",
resource_metadata={"reason": reason, "status": run.status.value},
)
if not decision.allowed:
self._audit(
"workflow.run.canceled",
f"workflow_run:{run_id}",
AuditOutcome.DENIED,
context,
decision,
details={"reason": reason} if reason else {},
)
raise AuthorizationError(
"Operation denied by policy",
details={
"action": "workflow.run.cancel",
"resource": f"workflow_run:{run_id}",
"correlation_id": context.correlation_id,
"policy_decision": decision.to_dict(),
},
)
self.repository.save_workflow_run(canceled)
self._audit(
"workflow.run.canceled",
f"workflow_run:{run_id}",
AuditOutcome.SUCCESS,
context,
decision,
details={"reason": reason} if reason else {},
)
return canceled
def _execute_run(
self,
template: WorkflowTemplate,
run: WorkflowRun,
context: OperationContext,
) -> WorkflowRunResult:
decision = PolicyDecision.from_dict(run.policy_context["run_execute"])
run = run.running()
self.repository.save_workflow_run(run)
self._audit(
"workflow.run.started",
f"workflow_run:{run.run_id}",
AuditOutcome.SUCCESS,
context,
decision,
details={"template_id": template.template_id, "template_version": template.version},
)
step_results: list[TransformationRunResult] = []
attempted_step_ids: set[str] = set()
progress = True
while progress:
progress = False
step_runs_by_id = {item.step_id: item for item in run.step_runs}
for step in template.steps:
current = step_runs_by_id[step.step_id]
if current.status in (
WorkflowStepRunStatus.COMPLETED,
WorkflowStepRunStatus.SKIPPED,
WorkflowStepRunStatus.CANCELED,
):
continue
if current.status == WorkflowStepRunStatus.FAILED and current.step_id in attempted_step_ids:
continue
dependency_diagnostic = _dependency_diagnostic(step, step_runs_by_id)
if dependency_diagnostic is not None:
if _dependency_terminal(step, step_runs_by_id):
current = _step_failure_by_behavior(step, current, dependency_diagnostic)
run = run.with_step_run(current)
self.repository.save_workflow_run(run)
progress = True
continue
current = current.running()
run = run.with_step_run(current)
self.repository.save_workflow_run(run)
self._audit(
"workflow.step.started",
f"workflow_run:{run.run_id}:step:{step.step_id}",
AuditOutcome.SUCCESS,
context,
decision,
details={"operation_id": step.operation_id, "kind": step.kind},
)
attempted_step_ids.add(step.step_id)
executed, step_result = self._execute_step(step, run, context)
if step_result is not None:
step_results.append(step_result)
if executed.status == WorkflowStepRunStatus.FAILED and step.failure_behavior == "continue":
executed = replace(executed, status=WorkflowStepRunStatus.SKIPPED)
run = run.with_step_run(executed)
self.repository.save_workflow_run(run)
self._audit(
"workflow.step.completed" if executed.status == WorkflowStepRunStatus.COMPLETED else "workflow.step.failed",
f"workflow_run:{run.run_id}:step:{step.step_id}",
AuditOutcome.SUCCESS if executed.status == WorkflowStepRunStatus.COMPLETED else AuditOutcome.FAILED,
context,
decision,
details={
"operation_id": step.operation_id,
"status": executed.status.value,
"diagnostics": list(executed.diagnostics),
},
)
progress = True
if executed.status == WorkflowStepRunStatus.FAILED and step.failure_behavior != "continue":
progress = False
break
final = _finalize_run(run)
self.repository.save_workflow_run(final)
event = self._audit(
_workflow_completion_operation(final),
f"workflow_run:{final.run_id}",
_workflow_audit_outcome(final),
context,
decision,
details={
"template_id": template.template_id,
"status": final.status.value,
"output_asset_ids": list(final.output_asset_ids),
"diagnostics": list(final.diagnostics),
},
)
return WorkflowRunResult(
run=final,
success=final.status == WorkflowRunStatus.COMPLETED,
diagnostics=tuple(Diagnostic(**item) for item in final.diagnostics),
audit_event=event,
policy_decision=decision,
step_results=tuple(step_results),
)
def _execute_step(
self,
step: WorkflowStepDefinition,
run: WorkflowRun,
context: OperationContext,
) -> tuple[WorkflowStepRun, TransformationRunResult | None]:
current = {item.step_id: item for item in run.step_runs}[step.step_id]
precondition_diagnostics = _precondition_diagnostics(step, run)
if precondition_diagnostics:
return current.failed(tuple(_diagnostic_dict(item) for item in precondition_diagnostics)), None
if step.kind != "transformation":
diagnostic = Diagnostic(
severity="error",
code="workflow.step_kind_unsupported",
message="Workflow step kind is not executable by the MVP runner",
details={"step_id": step.step_id, "kind": step.kind},
)
return current.failed((_diagnostic_dict(diagnostic),)), None
if not step.operation_id:
diagnostic = Diagnostic(
severity="error",
code="workflow.operation_missing",
message="Transformation workflow step requires an operation ID",
details={"step_id": step.step_id},
)
return current.failed((_diagnostic_dict(diagnostic),)), None
source_asset_ids, resolve_diagnostics = _resolve_source_asset_ids(step, run)
if resolve_diagnostics:
return current.failed(tuple(_diagnostic_dict(item) for item in resolve_diagnostics)), None
result = self.transformation_service.execute_transformation(
TransformationRequest(
operation_id=step.operation_id,
source_asset_ids=source_asset_ids,
parameters=dict(step.parameters),
output_asset_id=self._available_output_asset_id(
_resolve_value(step.outputs.get("asset_id"), run),
run,
),
output_title=_resolve_value(step.outputs.get("title"), run),
output_asset_type=str(step.outputs.get("asset_type", "derived_artifact")),
output_media_type=step.outputs.get("media_type"),
metadata=dict(step.metadata),
),
context,
)
if result.success and result.run is not None:
return (
current.completed(
transformation_run_id=result.run.run_id,
output_asset_ids=result.run.output_asset_ids,
),
result,
)
diagnostics = result.diagnostics or (
Diagnostic(
severity="error",
code="workflow.transformation_failed",
message="Transformation step failed without diagnostics",
details={"step_id": step.step_id, "operation_id": step.operation_id},
),
)
transformation_run_id = result.run.run_id if result.run else None
return (
replace(current, transformation_run_id=transformation_run_id).failed(
tuple(_diagnostic_dict(item) for item in diagnostics)
),
result,
)
def _available_output_asset_id(self, output_asset_id: Any, run: WorkflowRun) -> str | None:
if output_asset_id is None:
return None
output_asset_id = str(output_asset_id)
try:
self.repository.get_asset(output_asset_id)
except NotFoundError:
return output_asset_id
return f"{output_asset_id}-{run.run_id}"
def _authorize(
self,
context: OperationContext,
action: str,
resource: str,
*,
resource_metadata: dict[str, Any] | None = None,
) -> PolicyDecision:
self.repository.save_actor(context.actor)
try:
return self.policy_gateway.authorize(
context,
action,
resource,
resource_metadata=resource_metadata,
)
except Exception as exc:
return PolicyDecision.fail_closed(
context.actor.id,
action,
resource,
reason=str(exc) or "Workflow policy gateway failed",
context={"resource_metadata": resource_metadata or {}, "gateway_error": type(exc).__name__},
)
def _audit(
self,
operation: str,
target: str,
outcome: AuditOutcome,
context: OperationContext,
policy_decision: PolicyDecision,
*,
details: dict[str, Any] | None = None,
) -> AuditEvent:
event = AuditEvent.from_context(
operation,
target,
outcome,
context,
policy_decision=policy_decision,
details=details,
)
return self.repository.save_audit_event(event)
def _initial_step_runs(template: WorkflowTemplate) -> tuple[WorkflowStepRun, ...]:
return tuple(
WorkflowStepRun(step_id=step.step_id, operation_id=step.operation_id)
for step in template.steps
)
def _reset_step_run(step_run: WorkflowStepRun) -> WorkflowStepRun:
return WorkflowStepRun(step_id=step_run.step_id, operation_id=step_run.operation_id)
def _template_diagnostics(template: WorkflowTemplate) -> tuple[Diagnostic, ...]:
diagnostics: list[Diagnostic] = []
step_ids = [step.step_id for step in template.steps]
duplicate_ids = sorted({step_id for step_id in step_ids if step_ids.count(step_id) > 1})
for step_id in duplicate_ids:
diagnostics.append(
Diagnostic(
severity="error",
code="workflow.step_id_duplicate",
message="Workflow template contains duplicate step IDs",
details={"template_id": template.template_id, "step_id": step_id},
)
)
known = set(step_ids)
for step in template.steps:
if step.failure_behavior not in ("fail_workflow", "continue"):
diagnostics.append(
Diagnostic(
severity="error",
code="workflow.failure_behavior_unsupported",
message="Workflow step declares unsupported failure behavior",
details={"step_id": step.step_id, "failure_behavior": step.failure_behavior},
)
)
for dependency in step.depends_on:
if dependency not in known:
diagnostics.append(
Diagnostic(
severity="error",
code="workflow.dependency_missing",
message="Workflow step depends on an unknown step",
details={"step_id": step.step_id, "dependency": dependency},
)
)
cycle = _dependency_cycle(template.steps)
if cycle:
diagnostics.append(
Diagnostic(
severity="error",
code="workflow.dependency_cycle",
message="Workflow template contains a dependency cycle",
details={"cycle": cycle},
)
)
return tuple(diagnostics)
def _dependency_cycle(steps: tuple[WorkflowStepDefinition, ...]) -> list[str]:
graph = {step.step_id: tuple(step.depends_on) for step in steps}
visiting: set[str] = set()
visited: set[str] = set()
stack: list[str] = []
def visit(step_id: str) -> list[str] | None:
if step_id in visited:
return None
if step_id in visiting:
if step_id in stack:
return stack[stack.index(step_id):] + [step_id]
return [step_id]
visiting.add(step_id)
stack.append(step_id)
for dependency in graph.get(step_id, ()):
result = visit(dependency)
if result:
return result
visiting.remove(step_id)
visited.add(step_id)
stack.pop()
return None
for step_id in graph:
result = visit(step_id)
if result:
return result
return []
def _input_diagnostics(template: WorkflowTemplate, invocation: WorkflowInvocation) -> list[Diagnostic]:
diagnostics: list[Diagnostic] = []
specs = {item.name: item for item in template.inputs}
for spec in template.inputs:
if spec.required and spec.name not in invocation.inputs:
diagnostics.append(
Diagnostic(
severity="error",
code="workflow.input_missing",
message="Workflow invocation is missing a required input",
details={"template_id": template.template_id, "input": spec.name, "kind": spec.kind.value},
)
)
for name, binding in invocation.inputs.items():
spec = specs.get(name)
declared_kind = binding.get("kind") if isinstance(binding, dict) else None
if spec is not None and declared_kind is not None and declared_kind != spec.kind.value:
diagnostics.append(
Diagnostic(
severity="error",
code="workflow.input_kind_mismatch",
message="Workflow invocation input kind does not match the template declaration",
details={
"template_id": template.template_id,
"input": name,
"expected": spec.kind.value,
"actual": declared_kind,
},
)
)
return diagnostics
def _dependency_diagnostic(
step: WorkflowStepDefinition,
step_runs_by_id: dict[str, WorkflowStepRun],
) -> Diagnostic | None:
for dependency in step.depends_on:
dependency_run = step_runs_by_id[dependency]
if dependency_run.status != WorkflowStepRunStatus.COMPLETED:
return Diagnostic(
severity="error",
code="workflow.dependency_not_completed",
message="Workflow step dependency is not completed",
details={
"step_id": step.step_id,
"dependency": dependency,
"dependency_status": dependency_run.status.value,
},
)
return None
def _dependency_terminal(
step: WorkflowStepDefinition,
step_runs_by_id: dict[str, WorkflowStepRun],
) -> bool:
return any(
step_runs_by_id[dependency].status
in (
WorkflowStepRunStatus.FAILED,
WorkflowStepRunStatus.SKIPPED,
WorkflowStepRunStatus.CANCELED,
)
for dependency in step.depends_on
)
def _step_failure_by_behavior(
step: WorkflowStepDefinition,
step_run: WorkflowStepRun,
diagnostic: Diagnostic,
) -> WorkflowStepRun:
if step.failure_behavior == "continue":
return step_run.skipped((_diagnostic_dict(diagnostic),))
return step_run.failed((_diagnostic_dict(diagnostic),))
def _precondition_diagnostics(step: WorkflowStepDefinition, run: WorkflowRun) -> list[Diagnostic]:
diagnostics: list[Diagnostic] = []
for precondition in step.preconditions:
precondition_type = precondition.get("type")
if precondition_type == "input_present":
name = precondition.get("name")
if name not in run.input_bindings:
diagnostics.append(
Diagnostic(
severity="error",
code="workflow.precondition_failed",
message="Workflow step precondition failed",
details={"step_id": step.step_id, "type": precondition_type, "name": name},
)
)
elif precondition_type:
diagnostics.append(
Diagnostic(
severity="error",
code="workflow.precondition_unsupported",
message="Workflow step precondition type is unsupported",
details={"step_id": step.step_id, "type": precondition_type},
)
)
return diagnostics
def _resolve_source_asset_ids(
step: WorkflowStepDefinition,
run: WorkflowRun,
) -> tuple[tuple[str, ...], list[Diagnostic]]:
source_asset_ids = _resolve_value(step.inputs.get("source_asset_ids", ()), run)
try:
return _normalise_asset_ids(source_asset_ids), []
except ValueError as exc:
return (), [
Diagnostic(
severity="error",
code="workflow.source_asset_resolution_failed",
message="Workflow step source assets could not be resolved",
details={"step_id": step.step_id, "error": str(exc), "binding": source_asset_ids},
)
]
def _resolve_value(value: Any, run: WorkflowRun) -> Any:
if isinstance(value, str):
if value.startswith("$inputs."):
return run.input_bindings.get(value.removeprefix("$inputs."))
if value.startswith("$steps."):
parts = value.split(".")
if len(parts) == 3:
step_id = parts[1]
attr = parts[2]
step_run = {item.step_id: item for item in run.step_runs}.get(step_id)
if step_run is None:
return None
if attr == "output_asset_ids":
return step_run.output_asset_ids
if attr == "output_asset_id":
return step_run.output_asset_ids[0] if step_run.output_asset_ids else None
if attr == "transformation_run_id":
return step_run.transformation_run_id
return None
return value
if isinstance(value, list):
return [_resolve_value(item, run) for item in value]
if isinstance(value, tuple):
return tuple(_resolve_value(item, run) for item in value)
if isinstance(value, dict):
return {key: _resolve_value(item, run) for key, item in value.items()}
return value
def _normalise_asset_ids(value: Any) -> tuple[str, ...]:
if value is None:
return ()
if isinstance(value, str):
return (value,)
if isinstance(value, dict):
if "asset_id" in value:
return _normalise_asset_ids(value["asset_id"])
if "asset_ids" in value:
return _normalise_asset_ids(value["asset_ids"])
raise ValueError("asset binding dictionary must include asset_id or asset_ids")
if isinstance(value, (list, tuple)):
asset_ids: list[str] = []
for item in value:
asset_ids.extend(_normalise_asset_ids(item))
return tuple(asset_ids)
raise ValueError(f"unsupported asset binding type: {type(value).__name__}")
def _finalize_run(run: WorkflowRun) -> WorkflowRun:
output_asset_ids = tuple(
output_asset_id
for step_run in run.step_runs
if step_run.status == WorkflowStepRunStatus.COMPLETED
for output_asset_id in step_run.output_asset_ids
)
diagnostics = tuple(
diagnostic
for step_run in run.step_runs
if step_run.status
in (
WorkflowStepRunStatus.FAILED,
WorkflowStepRunStatus.SKIPPED,
WorkflowStepRunStatus.CANCELED,
WorkflowStepRunStatus.WAITING,
)
for diagnostic in step_run.diagnostics
)
statuses = {step_run.status for step_run in run.step_runs}
if not run.step_runs or statuses == {WorkflowStepRunStatus.COMPLETED}:
return run.completed(output_asset_ids=output_asset_ids)
if WorkflowStepRunStatus.FAILED in statuses or WorkflowStepRunStatus.CANCELED in statuses:
if output_asset_ids:
return run.partially_completed(output_asset_ids=output_asset_ids, diagnostics=diagnostics)
return run.failed(diagnostics)
if WorkflowStepRunStatus.SKIPPED in statuses:
if output_asset_ids:
return run.partially_completed(output_asset_ids=output_asset_ids, diagnostics=diagnostics)
return run.failed(diagnostics)
return run.waiting(diagnostics)
def _workflow_completion_operation(run: WorkflowRun) -> str:
if run.status == WorkflowRunStatus.COMPLETED:
return "workflow.run.completed"
if run.status == WorkflowRunStatus.PARTIALLY_COMPLETED:
return "workflow.run.partially_completed"
if run.status == WorkflowRunStatus.WAITING:
return "workflow.run.waiting"
return "workflow.run.failed"
def _workflow_audit_outcome(run: WorkflowRun) -> AuditOutcome:
if run.status == WorkflowRunStatus.COMPLETED:
return AuditOutcome.SUCCESS
if run.status == WorkflowRunStatus.PARTIALLY_COMPLETED:
return AuditOutcome.PARTIAL
if run.status == WorkflowRunStatus.WAITING:
return AuditOutcome.REVIEW_REQUIRED
return AuditOutcome.FAILED
def _permission_diagnostic(decision: PolicyDecision) -> Diagnostic:
return Diagnostic(
severity="error",
code="workflow.permission_denied",
message="Workflow operation denied by policy",
details={"policy_decision": decision.to_dict()},
)
def _diagnostic_dict(diagnostic: Diagnostic) -> dict[str, Any]:
return diagnostic.to_dict()