generated from coulomb/repo-seed
Implemented durable workflow/job foundation
This commit is contained in:
@@ -32,6 +32,7 @@ from .transformation_service import (
|
||||
TransformationService,
|
||||
default_transformation_registry,
|
||||
)
|
||||
from .workflow_service import WorkflowInvocation, WorkflowRunResult, WorkflowService
|
||||
|
||||
__all__ = [
|
||||
"AssetChangeResult",
|
||||
@@ -60,5 +61,8 @@ __all__ = [
|
||||
"TransformationRequest",
|
||||
"TransformationRunResult",
|
||||
"TransformationService",
|
||||
"WorkflowInvocation",
|
||||
"WorkflowRunResult",
|
||||
"WorkflowService",
|
||||
"default_transformation_registry",
|
||||
]
|
||||
|
||||
853
src/kontextual_engine/services/workflow_service.py
Normal file
853
src/kontextual_engine/services/workflow_service.py
Normal file
@@ -0,0 +1,853 @@
|
||||
"""Durable workflow templates and MVP job runner."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field, replace
|
||||
from typing import Any
|
||||
|
||||
from kontextual_engine.core import (
|
||||
AuditEvent,
|
||||
AuditOutcome,
|
||||
OperationContext,
|
||||
PolicyDecision,
|
||||
WorkflowRun,
|
||||
WorkflowRunStatus,
|
||||
WorkflowStepDefinition,
|
||||
WorkflowStepRun,
|
||||
WorkflowStepRunStatus,
|
||||
WorkflowTemplate,
|
||||
)
|
||||
from kontextual_engine.errors import AuthorizationError, Diagnostic, NotFoundError, ValidationError
|
||||
from kontextual_engine.ports import AllowAllPolicyGateway, AssetRegistryRepository, PolicyGateway
|
||||
|
||||
from .transformation_service import TransformationRequest, TransformationRunResult, TransformationService
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class WorkflowInvocation:
|
||||
template_id: str
|
||||
inputs: dict[str, Any] = field(default_factory=dict)
|
||||
template_version: str | None = None
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"template_id": self.template_id,
|
||||
"template_version": self.template_version,
|
||||
"inputs": dict(self.inputs),
|
||||
"metadata": dict(self.metadata),
|
||||
}
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class WorkflowRunResult:
|
||||
run: WorkflowRun
|
||||
success: bool
|
||||
diagnostics: tuple[Diagnostic, ...] = ()
|
||||
audit_event: AuditEvent | None = None
|
||||
policy_decision: PolicyDecision | None = None
|
||||
step_results: tuple[TransformationRunResult, ...] = ()
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
object.__setattr__(self, "diagnostics", tuple(self.diagnostics))
|
||||
object.__setattr__(self, "step_results", tuple(self.step_results))
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"success": self.success,
|
||||
"run": self.run.to_dict(),
|
||||
"diagnostics": [item.to_dict() for item in self.diagnostics],
|
||||
"audit_event": self.audit_event.to_dict() if self.audit_event else None,
|
||||
"policy_decision": self.policy_decision.to_dict() if self.policy_decision else None,
|
||||
"step_results": [item.to_dict() for item in self.step_results],
|
||||
}
|
||||
|
||||
|
||||
class WorkflowService:
|
||||
def __init__(
|
||||
self,
|
||||
repository: AssetRegistryRepository,
|
||||
*,
|
||||
transformation_service: TransformationService | None = None,
|
||||
policy_gateway: PolicyGateway | None = None,
|
||||
) -> None:
|
||||
self.repository = repository
|
||||
self.policy_gateway = policy_gateway or AllowAllPolicyGateway()
|
||||
self.transformation_service = transformation_service or TransformationService(
|
||||
repository,
|
||||
policy_gateway=self.policy_gateway,
|
||||
)
|
||||
|
||||
def register_template(self, template: WorkflowTemplate, context: OperationContext) -> WorkflowTemplate:
|
||||
self.repository.save_actor(context.actor)
|
||||
diagnostics = _template_diagnostics(template)
|
||||
if diagnostics:
|
||||
raise ValidationError(
|
||||
"Workflow template is invalid",
|
||||
details={"diagnostics": [item.to_dict() for item in diagnostics]},
|
||||
)
|
||||
template = template.with_actor(context.actor.id)
|
||||
decision = self._authorize(
|
||||
context,
|
||||
"workflow.template.register",
|
||||
f"workflow_template:{template.template_id}",
|
||||
resource_metadata={"template": template.to_dict()},
|
||||
)
|
||||
if not decision.allowed:
|
||||
self._audit(
|
||||
"workflow.template.register",
|
||||
f"workflow_template:{template.template_id}",
|
||||
AuditOutcome.DENIED,
|
||||
context,
|
||||
decision,
|
||||
)
|
||||
raise AuthorizationError(
|
||||
"Operation denied by policy",
|
||||
details={
|
||||
"action": "workflow.template.register",
|
||||
"resource": f"workflow_template:{template.template_id}",
|
||||
"correlation_id": context.correlation_id,
|
||||
"policy_decision": decision.to_dict(),
|
||||
},
|
||||
)
|
||||
saved = self.repository.save_workflow_template(template)
|
||||
self._audit(
|
||||
"workflow.template.register",
|
||||
f"workflow_template:{template.template_id}",
|
||||
AuditOutcome.SUCCESS,
|
||||
context,
|
||||
decision,
|
||||
details={"template_version": template.version, "template_hash": template.template_hash},
|
||||
)
|
||||
return saved
|
||||
|
||||
def get_template(self, template_id: str, *, version: str | None = None) -> WorkflowTemplate:
|
||||
return self.repository.get_workflow_template(template_id, version=version)
|
||||
|
||||
def list_templates(self, *, template_id: str | None = None) -> tuple[WorkflowTemplate, ...]:
|
||||
return tuple(self.repository.list_workflow_templates(template_id=template_id))
|
||||
|
||||
def queue_template(self, invocation: WorkflowInvocation, context: OperationContext) -> WorkflowRunResult:
|
||||
template = self.repository.get_workflow_template(
|
||||
invocation.template_id,
|
||||
version=invocation.template_version,
|
||||
)
|
||||
self.repository.save_actor(context.actor)
|
||||
decision = self._authorize(
|
||||
context,
|
||||
"workflow.run.execute",
|
||||
f"workflow_template:{template.template_id}",
|
||||
resource_metadata={"template": template.to_dict(), "invocation": invocation.to_dict()},
|
||||
)
|
||||
run = WorkflowRun(
|
||||
template_id=template.template_id,
|
||||
template_version=template.version,
|
||||
input_bindings=dict(invocation.inputs),
|
||||
actor_id=context.actor.id,
|
||||
correlation_id=context.correlation_id,
|
||||
policy_context={"run_execute": decision.to_dict()},
|
||||
step_runs=_initial_step_runs(template),
|
||||
)
|
||||
self.repository.save_workflow_run(run)
|
||||
self._audit(
|
||||
"workflow.run.queued",
|
||||
f"workflow_run:{run.run_id}",
|
||||
AuditOutcome.SUCCESS,
|
||||
context,
|
||||
decision,
|
||||
details={"template_id": template.template_id, "template_version": template.version},
|
||||
)
|
||||
if not decision.allowed:
|
||||
diagnostic = _permission_diagnostic(decision)
|
||||
failed = run.failed((_diagnostic_dict(diagnostic),))
|
||||
self.repository.save_workflow_run(failed)
|
||||
event = self._audit(
|
||||
"workflow.run.execute",
|
||||
f"workflow_run:{run.run_id}",
|
||||
AuditOutcome.DENIED,
|
||||
context,
|
||||
decision,
|
||||
details={"template_id": template.template_id},
|
||||
)
|
||||
return WorkflowRunResult(
|
||||
run=failed,
|
||||
success=False,
|
||||
diagnostics=(diagnostic,),
|
||||
audit_event=event,
|
||||
policy_decision=decision,
|
||||
)
|
||||
|
||||
input_diagnostics = _input_diagnostics(template, invocation)
|
||||
if input_diagnostics:
|
||||
failed = run.failed(tuple(_diagnostic_dict(item) for item in input_diagnostics))
|
||||
self.repository.save_workflow_run(failed)
|
||||
event = self._audit(
|
||||
"workflow.run.failed",
|
||||
f"workflow_run:{run.run_id}",
|
||||
AuditOutcome.FAILED,
|
||||
context,
|
||||
decision,
|
||||
details={"diagnostics": [item.to_dict() for item in input_diagnostics]},
|
||||
)
|
||||
return WorkflowRunResult(
|
||||
run=failed,
|
||||
success=False,
|
||||
diagnostics=tuple(input_diagnostics),
|
||||
audit_event=event,
|
||||
policy_decision=decision,
|
||||
)
|
||||
|
||||
return WorkflowRunResult(run=run, success=True, policy_decision=decision)
|
||||
|
||||
def invoke_template(self, invocation: WorkflowInvocation, context: OperationContext) -> WorkflowRunResult:
|
||||
queued = self.queue_template(invocation, context)
|
||||
if not queued.success:
|
||||
return queued
|
||||
return self.resume_run(queued.run.run_id, context)
|
||||
|
||||
def resume_run(self, run_id: str, context: OperationContext) -> WorkflowRunResult:
|
||||
run = self.repository.get_workflow_run(run_id)
|
||||
if run.status in (
|
||||
WorkflowRunStatus.COMPLETED,
|
||||
WorkflowRunStatus.CANCELED,
|
||||
WorkflowRunStatus.RETRIED,
|
||||
):
|
||||
diagnostic = Diagnostic(
|
||||
severity="error",
|
||||
code="workflow.run_not_resumable",
|
||||
message="Workflow run cannot be resumed from its current status",
|
||||
details={"run_id": run_id, "status": run.status.value},
|
||||
)
|
||||
return WorkflowRunResult(run=run, success=False, diagnostics=(diagnostic,))
|
||||
template = self.repository.get_workflow_template(run.template_id, version=run.template_version)
|
||||
return self._execute_run(template, run, context)
|
||||
|
||||
def retry_run(self, run_id: str, context: OperationContext) -> WorkflowRunResult:
|
||||
previous = self.repository.get_workflow_run(run_id)
|
||||
marked = previous.retried()
|
||||
self.repository.save_workflow_run(marked)
|
||||
retry = previous.retry(actor_id=context.actor.id, correlation_id=context.correlation_id)
|
||||
retry = replace(retry, step_runs=tuple(_reset_step_run(item) for item in previous.step_runs))
|
||||
self.repository.save_actor(context.actor)
|
||||
self.repository.save_workflow_run(retry)
|
||||
decision = PolicyDecision.allow(
|
||||
context.actor.id,
|
||||
"workflow.run.retry",
|
||||
f"workflow_run:{run_id}",
|
||||
context={"previous_run_id": run_id, "retry_run_id": retry.run_id},
|
||||
)
|
||||
self._audit(
|
||||
"workflow.run.retried",
|
||||
f"workflow_run:{run_id}",
|
||||
AuditOutcome.SUCCESS,
|
||||
context,
|
||||
decision,
|
||||
details={"retry_run_id": retry.run_id},
|
||||
)
|
||||
return self.resume_run(retry.run_id, context)
|
||||
|
||||
def cancel_run(self, run_id: str, context: OperationContext, *, reason: str | None = None) -> WorkflowRun:
|
||||
run = self.repository.get_workflow_run(run_id)
|
||||
diagnostic = Diagnostic(
|
||||
severity="warning",
|
||||
code="workflow.run_canceled",
|
||||
message="Workflow run was canceled",
|
||||
details={"reason": reason} if reason else {},
|
||||
)
|
||||
canceled_steps = tuple(
|
||||
step.canceled((_diagnostic_dict(diagnostic),))
|
||||
if step.status in (WorkflowStepRunStatus.QUEUED, WorkflowStepRunStatus.WAITING)
|
||||
else step
|
||||
for step in run.step_runs
|
||||
)
|
||||
canceled = replace(run, step_runs=canceled_steps).canceled((_diagnostic_dict(diagnostic),))
|
||||
self.repository.save_actor(context.actor)
|
||||
decision = self._authorize(
|
||||
context,
|
||||
"workflow.run.cancel",
|
||||
f"workflow_run:{run_id}",
|
||||
resource_metadata={"reason": reason, "status": run.status.value},
|
||||
)
|
||||
if not decision.allowed:
|
||||
self._audit(
|
||||
"workflow.run.canceled",
|
||||
f"workflow_run:{run_id}",
|
||||
AuditOutcome.DENIED,
|
||||
context,
|
||||
decision,
|
||||
details={"reason": reason} if reason else {},
|
||||
)
|
||||
raise AuthorizationError(
|
||||
"Operation denied by policy",
|
||||
details={
|
||||
"action": "workflow.run.cancel",
|
||||
"resource": f"workflow_run:{run_id}",
|
||||
"correlation_id": context.correlation_id,
|
||||
"policy_decision": decision.to_dict(),
|
||||
},
|
||||
)
|
||||
self.repository.save_workflow_run(canceled)
|
||||
self._audit(
|
||||
"workflow.run.canceled",
|
||||
f"workflow_run:{run_id}",
|
||||
AuditOutcome.SUCCESS,
|
||||
context,
|
||||
decision,
|
||||
details={"reason": reason} if reason else {},
|
||||
)
|
||||
return canceled
|
||||
|
||||
def _execute_run(
|
||||
self,
|
||||
template: WorkflowTemplate,
|
||||
run: WorkflowRun,
|
||||
context: OperationContext,
|
||||
) -> WorkflowRunResult:
|
||||
decision = PolicyDecision.from_dict(run.policy_context["run_execute"])
|
||||
run = run.running()
|
||||
self.repository.save_workflow_run(run)
|
||||
self._audit(
|
||||
"workflow.run.started",
|
||||
f"workflow_run:{run.run_id}",
|
||||
AuditOutcome.SUCCESS,
|
||||
context,
|
||||
decision,
|
||||
details={"template_id": template.template_id, "template_version": template.version},
|
||||
)
|
||||
|
||||
step_results: list[TransformationRunResult] = []
|
||||
attempted_step_ids: set[str] = set()
|
||||
progress = True
|
||||
while progress:
|
||||
progress = False
|
||||
step_runs_by_id = {item.step_id: item for item in run.step_runs}
|
||||
for step in template.steps:
|
||||
current = step_runs_by_id[step.step_id]
|
||||
if current.status in (
|
||||
WorkflowStepRunStatus.COMPLETED,
|
||||
WorkflowStepRunStatus.SKIPPED,
|
||||
WorkflowStepRunStatus.CANCELED,
|
||||
):
|
||||
continue
|
||||
if current.status == WorkflowStepRunStatus.FAILED and current.step_id in attempted_step_ids:
|
||||
continue
|
||||
dependency_diagnostic = _dependency_diagnostic(step, step_runs_by_id)
|
||||
if dependency_diagnostic is not None:
|
||||
if _dependency_terminal(step, step_runs_by_id):
|
||||
current = _step_failure_by_behavior(step, current, dependency_diagnostic)
|
||||
run = run.with_step_run(current)
|
||||
self.repository.save_workflow_run(run)
|
||||
progress = True
|
||||
continue
|
||||
|
||||
current = current.running()
|
||||
run = run.with_step_run(current)
|
||||
self.repository.save_workflow_run(run)
|
||||
self._audit(
|
||||
"workflow.step.started",
|
||||
f"workflow_run:{run.run_id}:step:{step.step_id}",
|
||||
AuditOutcome.SUCCESS,
|
||||
context,
|
||||
decision,
|
||||
details={"operation_id": step.operation_id, "kind": step.kind},
|
||||
)
|
||||
attempted_step_ids.add(step.step_id)
|
||||
executed, step_result = self._execute_step(step, run, context)
|
||||
if step_result is not None:
|
||||
step_results.append(step_result)
|
||||
if executed.status == WorkflowStepRunStatus.FAILED and step.failure_behavior == "continue":
|
||||
executed = replace(executed, status=WorkflowStepRunStatus.SKIPPED)
|
||||
run = run.with_step_run(executed)
|
||||
self.repository.save_workflow_run(run)
|
||||
self._audit(
|
||||
"workflow.step.completed" if executed.status == WorkflowStepRunStatus.COMPLETED else "workflow.step.failed",
|
||||
f"workflow_run:{run.run_id}:step:{step.step_id}",
|
||||
AuditOutcome.SUCCESS if executed.status == WorkflowStepRunStatus.COMPLETED else AuditOutcome.FAILED,
|
||||
context,
|
||||
decision,
|
||||
details={
|
||||
"operation_id": step.operation_id,
|
||||
"status": executed.status.value,
|
||||
"diagnostics": list(executed.diagnostics),
|
||||
},
|
||||
)
|
||||
progress = True
|
||||
if executed.status == WorkflowStepRunStatus.FAILED and step.failure_behavior != "continue":
|
||||
progress = False
|
||||
break
|
||||
|
||||
final = _finalize_run(run)
|
||||
self.repository.save_workflow_run(final)
|
||||
event = self._audit(
|
||||
_workflow_completion_operation(final),
|
||||
f"workflow_run:{final.run_id}",
|
||||
_workflow_audit_outcome(final),
|
||||
context,
|
||||
decision,
|
||||
details={
|
||||
"template_id": template.template_id,
|
||||
"status": final.status.value,
|
||||
"output_asset_ids": list(final.output_asset_ids),
|
||||
"diagnostics": list(final.diagnostics),
|
||||
},
|
||||
)
|
||||
return WorkflowRunResult(
|
||||
run=final,
|
||||
success=final.status == WorkflowRunStatus.COMPLETED,
|
||||
diagnostics=tuple(Diagnostic(**item) for item in final.diagnostics),
|
||||
audit_event=event,
|
||||
policy_decision=decision,
|
||||
step_results=tuple(step_results),
|
||||
)
|
||||
|
||||
def _execute_step(
|
||||
self,
|
||||
step: WorkflowStepDefinition,
|
||||
run: WorkflowRun,
|
||||
context: OperationContext,
|
||||
) -> tuple[WorkflowStepRun, TransformationRunResult | None]:
|
||||
current = {item.step_id: item for item in run.step_runs}[step.step_id]
|
||||
precondition_diagnostics = _precondition_diagnostics(step, run)
|
||||
if precondition_diagnostics:
|
||||
return current.failed(tuple(_diagnostic_dict(item) for item in precondition_diagnostics)), None
|
||||
if step.kind != "transformation":
|
||||
diagnostic = Diagnostic(
|
||||
severity="error",
|
||||
code="workflow.step_kind_unsupported",
|
||||
message="Workflow step kind is not executable by the MVP runner",
|
||||
details={"step_id": step.step_id, "kind": step.kind},
|
||||
)
|
||||
return current.failed((_diagnostic_dict(diagnostic),)), None
|
||||
if not step.operation_id:
|
||||
diagnostic = Diagnostic(
|
||||
severity="error",
|
||||
code="workflow.operation_missing",
|
||||
message="Transformation workflow step requires an operation ID",
|
||||
details={"step_id": step.step_id},
|
||||
)
|
||||
return current.failed((_diagnostic_dict(diagnostic),)), None
|
||||
|
||||
source_asset_ids, resolve_diagnostics = _resolve_source_asset_ids(step, run)
|
||||
if resolve_diagnostics:
|
||||
return current.failed(tuple(_diagnostic_dict(item) for item in resolve_diagnostics)), None
|
||||
|
||||
result = self.transformation_service.execute_transformation(
|
||||
TransformationRequest(
|
||||
operation_id=step.operation_id,
|
||||
source_asset_ids=source_asset_ids,
|
||||
parameters=dict(step.parameters),
|
||||
output_asset_id=self._available_output_asset_id(
|
||||
_resolve_value(step.outputs.get("asset_id"), run),
|
||||
run,
|
||||
),
|
||||
output_title=_resolve_value(step.outputs.get("title"), run),
|
||||
output_asset_type=str(step.outputs.get("asset_type", "derived_artifact")),
|
||||
output_media_type=step.outputs.get("media_type"),
|
||||
metadata=dict(step.metadata),
|
||||
),
|
||||
context,
|
||||
)
|
||||
if result.success and result.run is not None:
|
||||
return (
|
||||
current.completed(
|
||||
transformation_run_id=result.run.run_id,
|
||||
output_asset_ids=result.run.output_asset_ids,
|
||||
),
|
||||
result,
|
||||
)
|
||||
diagnostics = result.diagnostics or (
|
||||
Diagnostic(
|
||||
severity="error",
|
||||
code="workflow.transformation_failed",
|
||||
message="Transformation step failed without diagnostics",
|
||||
details={"step_id": step.step_id, "operation_id": step.operation_id},
|
||||
),
|
||||
)
|
||||
transformation_run_id = result.run.run_id if result.run else None
|
||||
return (
|
||||
replace(current, transformation_run_id=transformation_run_id).failed(
|
||||
tuple(_diagnostic_dict(item) for item in diagnostics)
|
||||
),
|
||||
result,
|
||||
)
|
||||
|
||||
def _available_output_asset_id(self, output_asset_id: Any, run: WorkflowRun) -> str | None:
|
||||
if output_asset_id is None:
|
||||
return None
|
||||
output_asset_id = str(output_asset_id)
|
||||
try:
|
||||
self.repository.get_asset(output_asset_id)
|
||||
except NotFoundError:
|
||||
return output_asset_id
|
||||
return f"{output_asset_id}-{run.run_id}"
|
||||
|
||||
def _authorize(
|
||||
self,
|
||||
context: OperationContext,
|
||||
action: str,
|
||||
resource: str,
|
||||
*,
|
||||
resource_metadata: dict[str, Any] | None = None,
|
||||
) -> PolicyDecision:
|
||||
self.repository.save_actor(context.actor)
|
||||
try:
|
||||
return self.policy_gateway.authorize(
|
||||
context,
|
||||
action,
|
||||
resource,
|
||||
resource_metadata=resource_metadata,
|
||||
)
|
||||
except Exception as exc:
|
||||
return PolicyDecision.fail_closed(
|
||||
context.actor.id,
|
||||
action,
|
||||
resource,
|
||||
reason=str(exc) or "Workflow policy gateway failed",
|
||||
context={"resource_metadata": resource_metadata or {}, "gateway_error": type(exc).__name__},
|
||||
)
|
||||
|
||||
def _audit(
|
||||
self,
|
||||
operation: str,
|
||||
target: str,
|
||||
outcome: AuditOutcome,
|
||||
context: OperationContext,
|
||||
policy_decision: PolicyDecision,
|
||||
*,
|
||||
details: dict[str, Any] | None = None,
|
||||
) -> AuditEvent:
|
||||
event = AuditEvent.from_context(
|
||||
operation,
|
||||
target,
|
||||
outcome,
|
||||
context,
|
||||
policy_decision=policy_decision,
|
||||
details=details,
|
||||
)
|
||||
return self.repository.save_audit_event(event)
|
||||
|
||||
|
||||
def _initial_step_runs(template: WorkflowTemplate) -> tuple[WorkflowStepRun, ...]:
|
||||
return tuple(
|
||||
WorkflowStepRun(step_id=step.step_id, operation_id=step.operation_id)
|
||||
for step in template.steps
|
||||
)
|
||||
|
||||
|
||||
def _reset_step_run(step_run: WorkflowStepRun) -> WorkflowStepRun:
|
||||
return WorkflowStepRun(step_id=step_run.step_id, operation_id=step_run.operation_id)
|
||||
|
||||
|
||||
def _template_diagnostics(template: WorkflowTemplate) -> tuple[Diagnostic, ...]:
|
||||
diagnostics: list[Diagnostic] = []
|
||||
step_ids = [step.step_id for step in template.steps]
|
||||
duplicate_ids = sorted({step_id for step_id in step_ids if step_ids.count(step_id) > 1})
|
||||
for step_id in duplicate_ids:
|
||||
diagnostics.append(
|
||||
Diagnostic(
|
||||
severity="error",
|
||||
code="workflow.step_id_duplicate",
|
||||
message="Workflow template contains duplicate step IDs",
|
||||
details={"template_id": template.template_id, "step_id": step_id},
|
||||
)
|
||||
)
|
||||
known = set(step_ids)
|
||||
for step in template.steps:
|
||||
if step.failure_behavior not in ("fail_workflow", "continue"):
|
||||
diagnostics.append(
|
||||
Diagnostic(
|
||||
severity="error",
|
||||
code="workflow.failure_behavior_unsupported",
|
||||
message="Workflow step declares unsupported failure behavior",
|
||||
details={"step_id": step.step_id, "failure_behavior": step.failure_behavior},
|
||||
)
|
||||
)
|
||||
for dependency in step.depends_on:
|
||||
if dependency not in known:
|
||||
diagnostics.append(
|
||||
Diagnostic(
|
||||
severity="error",
|
||||
code="workflow.dependency_missing",
|
||||
message="Workflow step depends on an unknown step",
|
||||
details={"step_id": step.step_id, "dependency": dependency},
|
||||
)
|
||||
)
|
||||
cycle = _dependency_cycle(template.steps)
|
||||
if cycle:
|
||||
diagnostics.append(
|
||||
Diagnostic(
|
||||
severity="error",
|
||||
code="workflow.dependency_cycle",
|
||||
message="Workflow template contains a dependency cycle",
|
||||
details={"cycle": cycle},
|
||||
)
|
||||
)
|
||||
return tuple(diagnostics)
|
||||
|
||||
|
||||
def _dependency_cycle(steps: tuple[WorkflowStepDefinition, ...]) -> list[str]:
|
||||
graph = {step.step_id: tuple(step.depends_on) for step in steps}
|
||||
visiting: set[str] = set()
|
||||
visited: set[str] = set()
|
||||
stack: list[str] = []
|
||||
|
||||
def visit(step_id: str) -> list[str] | None:
|
||||
if step_id in visited:
|
||||
return None
|
||||
if step_id in visiting:
|
||||
if step_id in stack:
|
||||
return stack[stack.index(step_id):] + [step_id]
|
||||
return [step_id]
|
||||
visiting.add(step_id)
|
||||
stack.append(step_id)
|
||||
for dependency in graph.get(step_id, ()):
|
||||
result = visit(dependency)
|
||||
if result:
|
||||
return result
|
||||
visiting.remove(step_id)
|
||||
visited.add(step_id)
|
||||
stack.pop()
|
||||
return None
|
||||
|
||||
for step_id in graph:
|
||||
result = visit(step_id)
|
||||
if result:
|
||||
return result
|
||||
return []
|
||||
|
||||
|
||||
def _input_diagnostics(template: WorkflowTemplate, invocation: WorkflowInvocation) -> list[Diagnostic]:
|
||||
diagnostics: list[Diagnostic] = []
|
||||
specs = {item.name: item for item in template.inputs}
|
||||
for spec in template.inputs:
|
||||
if spec.required and spec.name not in invocation.inputs:
|
||||
diagnostics.append(
|
||||
Diagnostic(
|
||||
severity="error",
|
||||
code="workflow.input_missing",
|
||||
message="Workflow invocation is missing a required input",
|
||||
details={"template_id": template.template_id, "input": spec.name, "kind": spec.kind.value},
|
||||
)
|
||||
)
|
||||
for name, binding in invocation.inputs.items():
|
||||
spec = specs.get(name)
|
||||
declared_kind = binding.get("kind") if isinstance(binding, dict) else None
|
||||
if spec is not None and declared_kind is not None and declared_kind != spec.kind.value:
|
||||
diagnostics.append(
|
||||
Diagnostic(
|
||||
severity="error",
|
||||
code="workflow.input_kind_mismatch",
|
||||
message="Workflow invocation input kind does not match the template declaration",
|
||||
details={
|
||||
"template_id": template.template_id,
|
||||
"input": name,
|
||||
"expected": spec.kind.value,
|
||||
"actual": declared_kind,
|
||||
},
|
||||
)
|
||||
)
|
||||
return diagnostics
|
||||
|
||||
|
||||
def _dependency_diagnostic(
|
||||
step: WorkflowStepDefinition,
|
||||
step_runs_by_id: dict[str, WorkflowStepRun],
|
||||
) -> Diagnostic | None:
|
||||
for dependency in step.depends_on:
|
||||
dependency_run = step_runs_by_id[dependency]
|
||||
if dependency_run.status != WorkflowStepRunStatus.COMPLETED:
|
||||
return Diagnostic(
|
||||
severity="error",
|
||||
code="workflow.dependency_not_completed",
|
||||
message="Workflow step dependency is not completed",
|
||||
details={
|
||||
"step_id": step.step_id,
|
||||
"dependency": dependency,
|
||||
"dependency_status": dependency_run.status.value,
|
||||
},
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def _dependency_terminal(
|
||||
step: WorkflowStepDefinition,
|
||||
step_runs_by_id: dict[str, WorkflowStepRun],
|
||||
) -> bool:
|
||||
return any(
|
||||
step_runs_by_id[dependency].status
|
||||
in (
|
||||
WorkflowStepRunStatus.FAILED,
|
||||
WorkflowStepRunStatus.SKIPPED,
|
||||
WorkflowStepRunStatus.CANCELED,
|
||||
)
|
||||
for dependency in step.depends_on
|
||||
)
|
||||
|
||||
|
||||
def _step_failure_by_behavior(
|
||||
step: WorkflowStepDefinition,
|
||||
step_run: WorkflowStepRun,
|
||||
diagnostic: Diagnostic,
|
||||
) -> WorkflowStepRun:
|
||||
if step.failure_behavior == "continue":
|
||||
return step_run.skipped((_diagnostic_dict(diagnostic),))
|
||||
return step_run.failed((_diagnostic_dict(diagnostic),))
|
||||
|
||||
|
||||
def _precondition_diagnostics(step: WorkflowStepDefinition, run: WorkflowRun) -> list[Diagnostic]:
|
||||
diagnostics: list[Diagnostic] = []
|
||||
for precondition in step.preconditions:
|
||||
precondition_type = precondition.get("type")
|
||||
if precondition_type == "input_present":
|
||||
name = precondition.get("name")
|
||||
if name not in run.input_bindings:
|
||||
diagnostics.append(
|
||||
Diagnostic(
|
||||
severity="error",
|
||||
code="workflow.precondition_failed",
|
||||
message="Workflow step precondition failed",
|
||||
details={"step_id": step.step_id, "type": precondition_type, "name": name},
|
||||
)
|
||||
)
|
||||
elif precondition_type:
|
||||
diagnostics.append(
|
||||
Diagnostic(
|
||||
severity="error",
|
||||
code="workflow.precondition_unsupported",
|
||||
message="Workflow step precondition type is unsupported",
|
||||
details={"step_id": step.step_id, "type": precondition_type},
|
||||
)
|
||||
)
|
||||
return diagnostics
|
||||
|
||||
|
||||
def _resolve_source_asset_ids(
|
||||
step: WorkflowStepDefinition,
|
||||
run: WorkflowRun,
|
||||
) -> tuple[tuple[str, ...], list[Diagnostic]]:
|
||||
source_asset_ids = _resolve_value(step.inputs.get("source_asset_ids", ()), run)
|
||||
try:
|
||||
return _normalise_asset_ids(source_asset_ids), []
|
||||
except ValueError as exc:
|
||||
return (), [
|
||||
Diagnostic(
|
||||
severity="error",
|
||||
code="workflow.source_asset_resolution_failed",
|
||||
message="Workflow step source assets could not be resolved",
|
||||
details={"step_id": step.step_id, "error": str(exc), "binding": source_asset_ids},
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
def _resolve_value(value: Any, run: WorkflowRun) -> Any:
|
||||
if isinstance(value, str):
|
||||
if value.startswith("$inputs."):
|
||||
return run.input_bindings.get(value.removeprefix("$inputs."))
|
||||
if value.startswith("$steps."):
|
||||
parts = value.split(".")
|
||||
if len(parts) == 3:
|
||||
step_id = parts[1]
|
||||
attr = parts[2]
|
||||
step_run = {item.step_id: item for item in run.step_runs}.get(step_id)
|
||||
if step_run is None:
|
||||
return None
|
||||
if attr == "output_asset_ids":
|
||||
return step_run.output_asset_ids
|
||||
if attr == "output_asset_id":
|
||||
return step_run.output_asset_ids[0] if step_run.output_asset_ids else None
|
||||
if attr == "transformation_run_id":
|
||||
return step_run.transformation_run_id
|
||||
return None
|
||||
return value
|
||||
if isinstance(value, list):
|
||||
return [_resolve_value(item, run) for item in value]
|
||||
if isinstance(value, tuple):
|
||||
return tuple(_resolve_value(item, run) for item in value)
|
||||
if isinstance(value, dict):
|
||||
return {key: _resolve_value(item, run) for key, item in value.items()}
|
||||
return value
|
||||
|
||||
|
||||
def _normalise_asset_ids(value: Any) -> tuple[str, ...]:
|
||||
if value is None:
|
||||
return ()
|
||||
if isinstance(value, str):
|
||||
return (value,)
|
||||
if isinstance(value, dict):
|
||||
if "asset_id" in value:
|
||||
return _normalise_asset_ids(value["asset_id"])
|
||||
if "asset_ids" in value:
|
||||
return _normalise_asset_ids(value["asset_ids"])
|
||||
raise ValueError("asset binding dictionary must include asset_id or asset_ids")
|
||||
if isinstance(value, (list, tuple)):
|
||||
asset_ids: list[str] = []
|
||||
for item in value:
|
||||
asset_ids.extend(_normalise_asset_ids(item))
|
||||
return tuple(asset_ids)
|
||||
raise ValueError(f"unsupported asset binding type: {type(value).__name__}")
|
||||
|
||||
|
||||
def _finalize_run(run: WorkflowRun) -> WorkflowRun:
|
||||
output_asset_ids = tuple(
|
||||
output_asset_id
|
||||
for step_run in run.step_runs
|
||||
if step_run.status == WorkflowStepRunStatus.COMPLETED
|
||||
for output_asset_id in step_run.output_asset_ids
|
||||
)
|
||||
diagnostics = tuple(
|
||||
diagnostic
|
||||
for step_run in run.step_runs
|
||||
if step_run.status
|
||||
in (
|
||||
WorkflowStepRunStatus.FAILED,
|
||||
WorkflowStepRunStatus.SKIPPED,
|
||||
WorkflowStepRunStatus.CANCELED,
|
||||
WorkflowStepRunStatus.WAITING,
|
||||
)
|
||||
for diagnostic in step_run.diagnostics
|
||||
)
|
||||
statuses = {step_run.status for step_run in run.step_runs}
|
||||
if not run.step_runs or statuses == {WorkflowStepRunStatus.COMPLETED}:
|
||||
return run.completed(output_asset_ids=output_asset_ids)
|
||||
if WorkflowStepRunStatus.FAILED in statuses or WorkflowStepRunStatus.CANCELED in statuses:
|
||||
if output_asset_ids:
|
||||
return run.partially_completed(output_asset_ids=output_asset_ids, diagnostics=diagnostics)
|
||||
return run.failed(diagnostics)
|
||||
if WorkflowStepRunStatus.SKIPPED in statuses:
|
||||
if output_asset_ids:
|
||||
return run.partially_completed(output_asset_ids=output_asset_ids, diagnostics=diagnostics)
|
||||
return run.failed(diagnostics)
|
||||
return run.waiting(diagnostics)
|
||||
|
||||
|
||||
def _workflow_completion_operation(run: WorkflowRun) -> str:
|
||||
if run.status == WorkflowRunStatus.COMPLETED:
|
||||
return "workflow.run.completed"
|
||||
if run.status == WorkflowRunStatus.PARTIALLY_COMPLETED:
|
||||
return "workflow.run.partially_completed"
|
||||
if run.status == WorkflowRunStatus.WAITING:
|
||||
return "workflow.run.waiting"
|
||||
return "workflow.run.failed"
|
||||
|
||||
|
||||
def _workflow_audit_outcome(run: WorkflowRun) -> AuditOutcome:
|
||||
if run.status == WorkflowRunStatus.COMPLETED:
|
||||
return AuditOutcome.SUCCESS
|
||||
if run.status == WorkflowRunStatus.PARTIALLY_COMPLETED:
|
||||
return AuditOutcome.PARTIAL
|
||||
if run.status == WorkflowRunStatus.WAITING:
|
||||
return AuditOutcome.REVIEW_REQUIRED
|
||||
return AuditOutcome.FAILED
|
||||
|
||||
|
||||
def _permission_diagnostic(decision: PolicyDecision) -> Diagnostic:
|
||||
return Diagnostic(
|
||||
severity="error",
|
||||
code="workflow.permission_denied",
|
||||
message="Workflow operation denied by policy",
|
||||
details={"policy_decision": decision.to_dict()},
|
||||
)
|
||||
|
||||
|
||||
def _diagnostic_dict(diagnostic: Diagnostic) -> dict[str, Any]:
|
||||
return diagnostic.to_dict()
|
||||
Reference in New Issue
Block a user