Workflow layer: gates, decisions, lineage audits, workflow test

This commit is contained in:
2026-05-06 18:54:55 +02:00
parent 3b5f96e159
commit f4f77b2eeb
11 changed files with 1037 additions and 25 deletions

View File

@@ -32,7 +32,12 @@ from .transformation_service import (
TransformationService,
default_transformation_registry,
)
from .workflow_service import WorkflowInvocation, WorkflowRunResult, WorkflowService
from .workflow_service import (
WorkflowInvocation,
WorkflowRunReconstruction,
WorkflowRunResult,
WorkflowService,
)
__all__ = [
"AssetChangeResult",
@@ -62,6 +67,7 @@ __all__ = [
"TransformationRunResult",
"TransformationService",
"WorkflowInvocation",
"WorkflowRunReconstruction",
"WorkflowRunResult",
"WorkflowService",
"default_transformation_registry",

View File

@@ -515,6 +515,20 @@ class TransformationService:
metadata_delta={"lineage_id": lineage.lineage_id, "operation_id": run.operation_id},
)
self.repository.save_derived_lineage(lineage)
self._audit(
"derived_artifact.lineage.linked",
f"asset:{output_asset_id}",
AuditOutcome.SUCCESS,
context,
decision,
details={
"lineage_id": lineage.lineage_id,
"transformation_run_id": run.run_id,
"source_asset_ids": list(lineage.source_asset_ids),
"source_version_ids": list(lineage.source_version_ids),
"output_representation_id": lineage.output_representation_id,
},
)
return asset_change, output_representation, lineage
def _authorize(

View File

@@ -8,8 +8,16 @@ from typing import Any
from kontextual_engine.core import (
AuditEvent,
AuditOutcome,
DerivedArtifactLineage,
OperationContext,
PolicyDecision,
TransformationRun,
WorkflowExceptionKind,
WorkflowExceptionRecord,
WorkflowExceptionStatus,
WorkflowReviewDecisionType,
WorkflowReviewStatus,
WorkflowReviewTask,
WorkflowRun,
WorkflowRunStatus,
WorkflowStepDefinition,
@@ -63,6 +71,35 @@ class WorkflowRunResult:
}
@dataclass(frozen=True)
class WorkflowRunReconstruction:
run: WorkflowRun
template: WorkflowTemplate
audit_events: tuple[AuditEvent, ...] = ()
transformation_runs: tuple[TransformationRun, ...] = ()
derived_lineage: tuple[DerivedArtifactLineage, ...] = ()
review_tasks: tuple[WorkflowReviewTask, ...] = ()
exceptions: tuple[WorkflowExceptionRecord, ...] = ()
def __post_init__(self) -> None:
object.__setattr__(self, "audit_events", tuple(self.audit_events))
object.__setattr__(self, "transformation_runs", tuple(self.transformation_runs))
object.__setattr__(self, "derived_lineage", tuple(self.derived_lineage))
object.__setattr__(self, "review_tasks", tuple(self.review_tasks))
object.__setattr__(self, "exceptions", tuple(self.exceptions))
def to_dict(self) -> dict[str, Any]:
return {
"run": self.run.to_dict(),
"template": self.template.to_dict(),
"audit_events": [item.to_dict() for item in self.audit_events],
"transformation_runs": [item.to_dict() for item in self.transformation_runs],
"derived_lineage": [item.to_dict() for item in self.derived_lineage],
"review_tasks": [item.to_dict() for item in self.review_tasks],
"exceptions": [item.to_dict() for item in self.exceptions],
}
class WorkflowService:
def __init__(
self,
@@ -127,6 +164,41 @@ class WorkflowService:
def list_templates(self, *, template_id: str | None = None) -> tuple[WorkflowTemplate, ...]:
return tuple(self.repository.list_workflow_templates(template_id=template_id))
def list_review_tasks(
self,
*,
status: WorkflowReviewStatus | str | None = WorkflowReviewStatus.OPEN,
workflow_run_id: str | None = None,
) -> tuple[WorkflowReviewTask, ...]:
status = WorkflowReviewStatus(status) if status is not None else None
reviews = [
review
for run in self.repository.list_workflow_runs(template_id=None)
if workflow_run_id is None or run.run_id == workflow_run_id
for review in run.review_tasks
if status is None or review.status == status
]
return tuple(sorted(reviews, key=lambda item: (item.requested_at, item.review_id)))
def list_exception_queue(
self,
*,
status: WorkflowExceptionStatus | str | None = WorkflowExceptionStatus.OPEN,
kind: WorkflowExceptionKind | str | None = None,
workflow_run_id: str | None = None,
) -> tuple[WorkflowExceptionRecord, ...]:
status = WorkflowExceptionStatus(status) if status is not None else None
kind = WorkflowExceptionKind(kind) if kind is not None else None
exceptions = [
exception
for run in self.repository.list_workflow_runs(template_id=None)
if workflow_run_id is None or run.run_id == workflow_run_id
for exception in run.exceptions
if (status is None or exception.status == status)
and (kind is None or exception.kind == kind)
]
return tuple(sorted(exceptions, key=lambda item: (item.created_at, item.exception_id)))
def queue_template(self, invocation: WorkflowInvocation, context: OperationContext) -> WorkflowRunResult:
template = self.repository.get_workflow_template(
invocation.template_id,
@@ -219,6 +291,17 @@ class WorkflowService:
details={"run_id": run_id, "status": run.status.value},
)
return WorkflowRunResult(run=run, success=False, diagnostics=(diagnostic,))
if run.open_review_tasks:
diagnostic = Diagnostic(
severity="warning",
code="workflow.review_open",
message="Workflow run is waiting for open review tasks",
details={
"run_id": run_id,
"review_ids": [item.review_id for item in run.open_review_tasks],
},
)
return WorkflowRunResult(run=run, success=False, diagnostics=(diagnostic,))
template = self.repository.get_workflow_template(run.template_id, version=run.template_version)
return self._execute_run(template, run, context)
@@ -297,6 +380,189 @@ class WorkflowService:
)
return canceled
def record_review_decision(
self,
run_id: str,
review_id: str,
decision: WorkflowReviewDecisionType | str,
context: OperationContext,
*,
note: str = "",
correction: dict[str, Any] | None = None,
) -> WorkflowRunResult:
run = self.repository.get_workflow_run(run_id)
review = _find_review(run, review_id)
if review.status != WorkflowReviewStatus.OPEN:
diagnostic = Diagnostic(
severity="error",
code="workflow.review_not_open",
message="Workflow review task is not open",
details={"run_id": run_id, "review_id": review_id, "status": review.status.value},
)
return WorkflowRunResult(run=run, success=False, diagnostics=(diagnostic,))
decision = WorkflowReviewDecisionType(decision)
policy_decision = self._authorize(
context,
"workflow.review.decide",
f"workflow_review:{review_id}",
resource_metadata={
"workflow_run_id": run_id,
"step_id": review.step_id,
"decision": decision.value,
"correction": correction or {},
},
)
if not policy_decision.allowed:
self._audit(
"workflow.review.decide",
f"workflow_review:{review_id}",
AuditOutcome.DENIED,
context,
policy_decision,
details={"workflow_run_id": run_id, "decision": decision.value},
)
raise AuthorizationError(
"Operation denied by policy",
details={
"action": "workflow.review.decide",
"resource": f"workflow_review:{review_id}",
"correlation_id": context.correlation_id,
"policy_decision": policy_decision.to_dict(),
},
)
updated_review = review.decide(
decision,
actor_id=context.actor.id,
note=note,
correction=correction,
)
run = run.with_review_task(updated_review)
exception = _exception_for_review(run, review_id)
if exception is not None:
if decision == WorkflowReviewDecisionType.ESCALATE:
exception = exception.escalate(actor_id=context.actor.id, resolution=note or decision.value)
else:
exception = exception.resolve(actor_id=context.actor.id, resolution=note or decision.value)
run = run.with_exception(exception)
self._audit(
f"workflow.review.{decision.value}",
f"workflow_review:{review_id}",
AuditOutcome.REVIEW_REQUIRED
if decision == WorkflowReviewDecisionType.ESCALATE
else AuditOutcome.SUCCESS,
context,
policy_decision,
details={
"workflow_run_id": run_id,
"step_id": review.step_id,
"decision": decision.value,
"exception_id": exception.exception_id if exception else None,
"correction": correction or {},
},
)
step = _find_step_run(run, review.step_id)
if decision in (WorkflowReviewDecisionType.CONTINUE, WorkflowReviewDecisionType.CORRECT):
completed_step = step.completed(
transformation_run_id=step.transformation_run_id,
output_asset_ids=step.output_asset_ids,
)
run = run.with_step_run(completed_step)
self.repository.save_workflow_run(run)
return self.resume_run(run.run_id, context)
if decision == WorkflowReviewDecisionType.RETRY:
self.repository.save_workflow_run(run)
return self.retry_run(run.run_id, context)
if decision == WorkflowReviewDecisionType.ESCALATE:
waiting = run.waiting(
(
_diagnostic_dict(
Diagnostic(
severity="warning",
code="workflow.review_escalated",
message="Workflow review was escalated",
details={"review_id": review_id, "note": note},
)
),
)
)
self.repository.save_workflow_run(waiting)
return WorkflowRunResult(run=waiting, success=False, policy_decision=policy_decision)
diagnostic = Diagnostic(
severity="error",
code="workflow.review_rejected",
message="Workflow output was rejected during review",
details={"review_id": review_id, "note": note},
)
failed_step = step.failed((_diagnostic_dict(diagnostic),))
run = run.with_step_run(failed_step)
final = _finalize_run(run)
self.repository.save_workflow_run(final)
event = self._audit(
_workflow_completion_operation(final),
f"workflow_run:{final.run_id}",
_workflow_audit_outcome(final),
context,
policy_decision,
details={
"status": final.status.value,
"review_id": review_id,
"diagnostics": list(final.diagnostics),
},
)
return WorkflowRunResult(
run=final,
success=False,
diagnostics=tuple(Diagnostic(**item) for item in final.diagnostics),
audit_event=event,
policy_decision=policy_decision,
)
def reconstruct_run(self, run_id: str) -> WorkflowRunReconstruction:
run = self.repository.get_workflow_run(run_id)
template = self.repository.get_workflow_template(run.template_id, version=run.template_version)
transformation_runs: list[TransformationRun] = []
for step in run.step_runs:
if step.transformation_run_id:
transformation_runs.append(self.repository.get_transformation_run(step.transformation_run_id))
derived_lineage: list[DerivedArtifactLineage] = []
for output_asset_id in run.output_asset_ids:
derived_lineage.extend(self.repository.list_derived_lineage(output_asset_id=output_asset_id))
for step in run.step_runs:
for output_asset_id in step.output_asset_ids:
derived_lineage.extend(self.repository.list_derived_lineage(output_asset_id=output_asset_id))
lineage_by_id = {item.lineage_id: item for item in derived_lineage}
transformation_targets = {
f"transformation_run:{item.run_id}"
for item in transformation_runs
}
workflow_targets = {f"workflow_run:{run.run_id}"}
workflow_targets.update(f"workflow_run:{run.run_id}:step:{item.step_id}" for item in run.step_runs)
workflow_targets.update(f"workflow_review:{item.review_id}" for item in run.review_tasks)
asset_targets = {
f"asset:{output_asset_id}"
for lineage in lineage_by_id.values()
for output_asset_id in (lineage.output_asset_id,)
}
relevant_targets = workflow_targets | transformation_targets | asset_targets
audit_events = tuple(
event
for event in self.repository.list_audit_events(correlation_id=run.correlation_id)
if event.target in relevant_targets
)
return WorkflowRunReconstruction(
run=run,
template=template,
audit_events=audit_events,
transformation_runs=tuple(transformation_runs),
derived_lineage=tuple(lineage_by_id.values()),
review_tasks=run.review_tasks,
exceptions=run.exceptions,
)
def _execute_run(
self,
template: WorkflowTemplate,
@@ -329,6 +595,11 @@ class WorkflowService:
WorkflowStepRunStatus.CANCELED,
):
continue
if current.status == WorkflowStepRunStatus.WAITING and _step_has_open_review(
run,
step.step_id,
):
continue
if current.status == WorkflowStepRunStatus.FAILED and current.step_id in attempted_step_ids:
continue
dependency_diagnostic = _dependency_diagnostic(step, step_runs_by_id)
@@ -355,14 +626,73 @@ class WorkflowService:
executed, step_result = self._execute_step(step, run, context)
if step_result is not None:
step_results.append(step_result)
if executed.status == WorkflowStepRunStatus.COMPLETED:
review_task, exception = _review_gate_records(step, run, executed, context)
if review_task is not None and exception is not None:
review_task = replace(review_task, exception_id=exception.exception_id)
exception = replace(exception, review_id=review_task.review_id)
diagnostic = Diagnostic(
severity="warning",
code="workflow.review_required",
message="Workflow step output requires review before the run can continue",
details={
"step_id": step.step_id,
"review_id": review_task.review_id,
"exception_id": exception.exception_id,
"queue": review_task.queue,
},
)
executed = replace(
executed,
status=WorkflowStepRunStatus.WAITING,
diagnostics=executed.diagnostics + (_diagnostic_dict(diagnostic),),
completed_at=None,
)
run = run.with_review_task(review_task).with_exception(exception)
self.repository.save_workflow_run(run)
self._audit(
"workflow.review.requested",
f"workflow_review:{review_task.review_id}",
AuditOutcome.REVIEW_REQUIRED,
context,
decision,
details={
"workflow_run_id": run.run_id,
"step_id": step.step_id,
"exception_id": exception.exception_id,
"output_asset_ids": list(review_task.output_asset_ids),
},
)
self._audit(
"workflow.exception.opened",
f"workflow_run:{run.run_id}",
AuditOutcome.REVIEW_REQUIRED,
context,
decision,
details=exception.to_dict(),
)
if executed.status == WorkflowStepRunStatus.FAILED and step.failure_behavior == "continue":
executed = replace(executed, status=WorkflowStepRunStatus.SKIPPED)
if executed.status in (WorkflowStepRunStatus.FAILED, WorkflowStepRunStatus.SKIPPED):
exception = _step_exception_record(step, run, executed)
run = run.with_exception(exception)
self.repository.save_workflow_run(run)
self._audit(
"workflow.exception.opened",
f"workflow_run:{run.run_id}",
AuditOutcome.FAILED,
context,
decision,
details=exception.to_dict(),
)
run = run.with_step_run(executed)
self.repository.save_workflow_run(run)
step_audit_operation = _step_audit_operation(executed)
step_audit_outcome = _step_audit_outcome(executed)
self._audit(
"workflow.step.completed" if executed.status == WorkflowStepRunStatus.COMPLETED else "workflow.step.failed",
step_audit_operation,
f"workflow_run:{run.run_id}:step:{step.step_id}",
AuditOutcome.SUCCESS if executed.status == WorkflowStepRunStatus.COMPLETED else AuditOutcome.FAILED,
step_audit_outcome,
context,
decision,
details={
@@ -372,6 +702,8 @@ class WorkflowService:
},
)
progress = True
if executed.status == WorkflowStepRunStatus.WAITING:
continue
if executed.status == WorkflowStepRunStatus.FAILED and step.failure_behavior != "continue":
progress = False
break
@@ -538,6 +870,122 @@ def _reset_step_run(step_run: WorkflowStepRun) -> WorkflowStepRun:
return WorkflowStepRun(step_id=step_run.step_id, operation_id=step_run.operation_id)
def _find_review(run: WorkflowRun, review_id: str) -> WorkflowReviewTask:
for review in run.review_tasks:
if review.review_id == review_id:
return review
raise NotFoundError("Workflow review task not found", details={"run_id": run.run_id, "review_id": review_id})
def _find_step_run(run: WorkflowRun, step_id: str) -> WorkflowStepRun:
for step_run in run.step_runs:
if step_run.step_id == step_id:
return step_run
raise NotFoundError("Workflow step run not found", details={"run_id": run.run_id, "step_id": step_id})
def _exception_for_review(run: WorkflowRun, review_id: str) -> WorkflowExceptionRecord | None:
for exception in run.exceptions:
if exception.review_id == review_id:
return exception
return None
def _step_has_open_review(run: WorkflowRun, step_id: str) -> bool:
return any(
review.step_id == step_id and review.status == WorkflowReviewStatus.OPEN
for review in run.review_tasks
)
def _review_gate_records(
step: WorkflowStepDefinition,
run: WorkflowRun,
step_run: WorkflowStepRun,
context: OperationContext,
) -> tuple[WorkflowReviewTask | None, WorkflowExceptionRecord | None]:
gate = dict(step.review_gate)
if not gate.get("required") and not step.metadata.get("review_required"):
return None, None
reason = str(gate.get("reason") or step.metadata.get("review_reason") or "Workflow step output requires review")
queue = str(gate.get("queue") or step.metadata.get("review_queue") or "human-review")
assigned_to = gate.get("assigned_to") or step.metadata.get("review_assignee")
review = WorkflowReviewTask(
workflow_run_id=run.run_id,
step_id=step.step_id,
reason=reason,
requested_by=context.actor.id,
output_asset_ids=step_run.output_asset_ids,
queue=queue,
assigned_to=assigned_to,
diagnostics=step_run.diagnostics,
)
exception = WorkflowExceptionRecord(
workflow_run_id=run.run_id,
kind=WorkflowExceptionKind.REVIEW_REQUIRED,
message=reason,
step_id=step.step_id,
diagnostics=step_run.diagnostics,
output_asset_ids=step_run.output_asset_ids,
assigned_to=assigned_to,
)
return review, exception
def _step_exception_record(
step: WorkflowStepDefinition,
run: WorkflowRun,
step_run: WorkflowStepRun,
) -> WorkflowExceptionRecord:
kind = _exception_kind_for_diagnostics(step_run.diagnostics)
message = "Workflow step failed"
if step_run.status == WorkflowStepRunStatus.SKIPPED:
message = "Workflow step was skipped after a recoverable failure"
if step_run.diagnostics:
message = str(step_run.diagnostics[-1].get("message") or message)
return WorkflowExceptionRecord(
workflow_run_id=run.run_id,
kind=kind,
message=message,
step_id=step.step_id,
diagnostics=step_run.diagnostics,
output_asset_ids=step_run.output_asset_ids,
)
def _exception_kind_for_diagnostics(diagnostics: tuple[dict[str, Any], ...]) -> WorkflowExceptionKind:
codes = {str(item.get("code", "")) for item in diagnostics}
if any("permission_denied" in code or "policy" in code for code in codes):
return WorkflowExceptionKind.POLICY_CONFLICT
if any("low_confidence" in code for code in codes):
return WorkflowExceptionKind.LOW_CONFIDENCE
if any("dependency" in code or "blocked" in code or "precondition" in code for code in codes):
return WorkflowExceptionKind.BLOCKED
return WorkflowExceptionKind.FAILED
def _step_audit_operation(step_run: WorkflowStepRun) -> str:
if step_run.status == WorkflowStepRunStatus.COMPLETED:
return "workflow.step.completed"
if step_run.status == WorkflowStepRunStatus.WAITING:
return "workflow.step.waiting"
if step_run.status == WorkflowStepRunStatus.SKIPPED:
return "workflow.step.skipped"
if step_run.status == WorkflowStepRunStatus.CANCELED:
return "workflow.step.canceled"
return "workflow.step.failed"
def _step_audit_outcome(step_run: WorkflowStepRun) -> AuditOutcome:
if step_run.status == WorkflowStepRunStatus.COMPLETED:
return AuditOutcome.SUCCESS
if step_run.status == WorkflowStepRunStatus.WAITING:
return AuditOutcome.REVIEW_REQUIRED
if step_run.status == WorkflowStepRunStatus.SKIPPED:
return AuditOutcome.PARTIAL
return AuditOutcome.FAILED
def _template_diagnostics(template: WorkflowTemplate) -> tuple[Diagnostic, ...]:
diagnostics: list[Diagnostic] = []
step_ids = [step.step_id for step in template.steps]