Wire instruction report execution

This commit is contained in:
2026-05-19 18:28:23 +02:00
parent 0e7084207e
commit 0dc342eb1b
7 changed files with 513 additions and 16 deletions

View File

@@ -11,6 +11,8 @@ import hashlib
import json
import logging
import re
from dataclasses import dataclass
from pathlib import Path
from typing import Any
from activity_core.rules.evaluator import UnsafeExpression, evaluate_condition
@@ -26,6 +28,19 @@ class UntrustedFieldError(ValueError):
"""Raised when a prompt placeholder references a field not in trusted_fields."""
@dataclass
class InstructionResult:
"""Instruction output plus audit metadata for workflow integration."""
tasks: list[TaskSpec]
report: dict[str, Any] | None = None
prompt_hash: str | None = None
model: str | None = None
output_validated: bool = False
review_required: bool = False
condition_matched: str | None = None
def _resolve_path(obj: Any, path: str) -> Any:
"""Walk a dot-separated path on obj or dict. Returns None if not found."""
parts = path.split(".")
@@ -92,14 +107,24 @@ def execute_instruction(
4. Validate response against instr.output_schema (JSON Schema). Retry once.
5. Return list[TaskSpec].
"""
return execute_instruction_with_audit(instr, event, context, llm_client).tasks
def execute_instruction_with_audit(
instr: Any,
event: Any,
context: dict,
llm_client: Any,
) -> InstructionResult:
"""Evaluate an Instruction and return task specs plus audit metadata."""
try:
return _execute(instr, event, context, llm_client)
except UntrustedFieldError as exc:
logger.warning("instruction %r rejected — %s", instr.id, exc)
return []
return _empty_result(instr)
except Exception as exc:
logger.warning("instruction %r failed — %s", instr.id, exc)
return []
return _empty_result(instr)
def _execute(
@@ -107,14 +132,14 @@ def _execute(
event: Any,
context: dict,
llm_client: Any,
) -> list[TaskSpec]:
) -> InstructionResult:
# Step 1 — pre-filter
try:
if instr.condition and not evaluate_condition(instr.condition, event, context):
return []
return _empty_result(instr)
except UnsafeExpression as exc:
logger.warning("instruction %r condition is unsafe — %s", instr.id, exc)
return []
return _empty_result(instr)
# Step 2 — render prompt (raises UntrustedFieldError on policy violation)
rendered = _render_prompt(instr.prompt, instr.trusted_fields, event, context)
@@ -124,34 +149,87 @@ def _execute(
raw_output = llm_client.complete(rendered, model=instr.model)
# Step 4 — validate and optionally retry
task_specs, error = _validate_output(raw_output, instr)
task_specs, report, error = _validate_output(raw_output, instr)
if error:
retry_prompt = rendered + f"\n\nPrevious output was invalid: {error}\nPlease fix."
raw_output = llm_client.complete(retry_prompt, model=instr.model)
task_specs, error = _validate_output(raw_output, instr)
task_specs, report, error = _validate_output(raw_output, instr)
if error:
logger.warning(
"instruction_output_error: instruction=%r, prompt_hash=%s, error=%s",
instr.id, prompt_hash, error,
)
return []
return _empty_result(instr, prompt_hash=prompt_hash)
return task_specs
return InstructionResult(
tasks=task_specs,
report=report,
prompt_hash=prompt_hash,
model=instr.model,
output_validated=True,
review_required=bool(getattr(instr, "review_required", False)),
condition_matched=instr.condition or None,
)
def _validate_output(raw_output: Any, instr: Any) -> tuple[list[TaskSpec], str | None]:
"""Parse raw LLM output into TaskSpec list. Returns (specs, error_message)."""
def _empty_result(instr: Any, prompt_hash: str | None = None) -> InstructionResult:
return InstructionResult(
tasks=[],
prompt_hash=prompt_hash,
model=getattr(instr, "model", None),
output_validated=False,
review_required=bool(getattr(instr, "review_required", False)),
condition_matched=getattr(instr, "condition", "") or None,
)
def _validate_output(
raw_output: Any,
instr: Any,
) -> tuple[list[TaskSpec], dict[str, Any] | None, str | None]:
"""Parse raw LLM output into TaskSpecs and optional report payload.
Accepted shapes:
- list[task]
- single task dict with title/description/etc.
- {"tasks": [...], "report": {...}}
- report-only dict, such as {"summary": "...", "recommendations": [...]}
Returns (specs, report, error_message).
"""
try:
if isinstance(raw_output, str):
data = json.loads(raw_output)
else:
data = raw_output
if not isinstance(data, list):
data = [data]
schema_error = _validate_against_schema(data, getattr(instr, "output_schema", ""))
if schema_error:
return [], None, schema_error
report: dict[str, Any] | None = None
task_items: list[Any]
if isinstance(data, dict) and ("tasks" in data or "report" in data):
maybe_report = data.get("report")
if maybe_report is not None and not isinstance(maybe_report, dict):
return [], None, "report must be a JSON object"
report = maybe_report
tasks = data.get("tasks", [])
if not isinstance(tasks, list):
return [], None, "tasks must be a JSON array"
task_items = tasks
elif isinstance(data, dict) and "title" not in data:
report = data
task_items = []
elif isinstance(data, list):
task_items = data
else:
task_items = [data]
specs = []
for item in data:
for item in task_items:
if not isinstance(item, dict):
return [], None, "each task must be a JSON object"
specs.append(TaskSpec(
title=item.get("title", ""),
description=item.get("description", ""),
@@ -162,6 +240,70 @@ def _validate_output(raw_output: Any, instr: Any) -> tuple[list[TaskSpec], str |
source_type="instruction",
source_id=instr.id,
))
return specs, None
return specs, report, None
except (json.JSONDecodeError, AttributeError, KeyError, TypeError) as exc:
return [], str(exc)
return [], None, str(exc)
def _validate_against_schema(data: Any, schema_path: str) -> str | None:
if not schema_path:
return None
path = Path(schema_path)
if not path.exists():
return None
try:
schema = json.loads(path.read_text(encoding="utf-8"))
except (OSError, json.JSONDecodeError) as exc:
return f"could not read output schema: {exc}"
return _validate_schema_node(data, schema, "$")
def _validate_schema_node(data: Any, schema: dict[str, Any], path: str) -> str | None:
expected_type = schema.get("type")
if expected_type and not _matches_type(data, expected_type):
return f"{path}: expected {expected_type}"
if expected_type == "object":
required = schema.get("required", [])
if isinstance(required, list):
for key in required:
if isinstance(key, str) and key not in data:
return f"{path}: missing required property {key!r}"
properties = schema.get("properties", {})
if isinstance(properties, dict):
for key, child_schema in properties.items():
if key in data and isinstance(child_schema, dict):
error = _validate_schema_node(data[key], child_schema, f"{path}.{key}")
if error:
return error
if expected_type == "array":
item_schema = schema.get("items")
if isinstance(item_schema, dict):
for index, item in enumerate(data):
error = _validate_schema_node(item, item_schema, f"{path}[{index}]")
if error:
return error
return None
def _matches_type(data: Any, expected_type: str) -> bool:
if expected_type == "object":
return isinstance(data, dict)
if expected_type == "array":
return isinstance(data, list)
if expected_type == "string":
return isinstance(data, str)
if expected_type == "integer":
return isinstance(data, int) and not isinstance(data, bool)
if expected_type == "number":
return isinstance(data, (int, float)) and not isinstance(data, bool)
if expected_type == "boolean":
return isinstance(data, bool)
if expected_type == "null":
return data is None
return True