Wire instruction report execution

2026-05-19 18:28:23 +02:00
parent 0e7084207e
commit 0dc342eb1b
7 changed files with 513 additions and 16 deletions
--- a/src/activity_core/rules/executor.py
+++ b/src/activity_core/rules/executor.py
@@ -11,6 +11,8 @@ import hashlib
 import json
 import logging
 import re
+from dataclasses import dataclass
+from pathlib import Path
 from typing import Any

 from activity_core.rules.evaluator import UnsafeExpression, evaluate_condition
@@ -26,6 +28,19 @@ class UntrustedFieldError(ValueError):
    """Raised when a prompt placeholder references a field not in trusted_fields."""


+@dataclass
+class InstructionResult:
+    """Instruction output plus audit metadata for workflow integration."""
+
+    tasks: list[TaskSpec]
+    report: dict[str, Any] | None = None
+    prompt_hash: str | None = None
+    model: str | None = None
+    output_validated: bool = False
+    review_required: bool = False
+    condition_matched: str | None = None
+
+
 def _resolve_path(obj: Any, path: str) -> Any:
    """Walk a dot-separated path on obj or dict. Returns None if not found."""
    parts = path.split(".")
@@ -92,14 +107,24 @@ def execute_instruction(
      4. Validate response against instr.output_schema (JSON Schema). Retry once.
      5. Return list[TaskSpec].
    """
+    return execute_instruction_with_audit(instr, event, context, llm_client).tasks
+
+
+def execute_instruction_with_audit(
+    instr: Any,
+    event: Any,
+    context: dict,
+    llm_client: Any,
+) -> InstructionResult:
+    """Evaluate an Instruction and return task specs plus audit metadata."""
    try:
        return _execute(instr, event, context, llm_client)
    except UntrustedFieldError as exc:
        logger.warning("instruction %r rejected — %s", instr.id, exc)
-        return []
+        return _empty_result(instr)
    except Exception as exc:
        logger.warning("instruction %r failed — %s", instr.id, exc)
-        return []
+        return _empty_result(instr)


 def _execute(
@@ -107,14 +132,14 @@ def _execute(
    event: Any,
    context: dict,
    llm_client: Any,
-) -> list[TaskSpec]:
+) -> InstructionResult:
    # Step 1 — pre-filter
    try:
        if instr.condition and not evaluate_condition(instr.condition, event, context):
-            return []
+            return _empty_result(instr)
    except UnsafeExpression as exc:
        logger.warning("instruction %r condition is unsafe — %s", instr.id, exc)
-        return []
+        return _empty_result(instr)

    # Step 2 — render prompt (raises UntrustedFieldError on policy violation)
    rendered = _render_prompt(instr.prompt, instr.trusted_fields, event, context)
@@ -124,34 +149,87 @@ def _execute(
    raw_output = llm_client.complete(rendered, model=instr.model)

    # Step 4 — validate and optionally retry
-    task_specs, error = _validate_output(raw_output, instr)
+    task_specs, report, error = _validate_output(raw_output, instr)
    if error:
        retry_prompt = rendered + f"\n\nPrevious output was invalid: {error}\nPlease fix."
        raw_output = llm_client.complete(retry_prompt, model=instr.model)
-        task_specs, error = _validate_output(raw_output, instr)
+        task_specs, report, error = _validate_output(raw_output, instr)
        if error:
            logger.warning(
                "instruction_output_error: instruction=%r, prompt_hash=%s, error=%s",
                instr.id, prompt_hash, error,
            )
-            return []
+            return _empty_result(instr, prompt_hash=prompt_hash)

-    return task_specs
+    return InstructionResult(
+        tasks=task_specs,
+        report=report,
+        prompt_hash=prompt_hash,
+        model=instr.model,
+        output_validated=True,
+        review_required=bool(getattr(instr, "review_required", False)),
+        condition_matched=instr.condition or None,
+    )


-def _validate_output(raw_output: Any, instr: Any) -> tuple[list[TaskSpec], str | None]:
-    """Parse raw LLM output into TaskSpec list. Returns (specs, error_message)."""
+def _empty_result(instr: Any, prompt_hash: str | None = None) -> InstructionResult:
+    return InstructionResult(
+        tasks=[],
+        prompt_hash=prompt_hash,
+        model=getattr(instr, "model", None),
+        output_validated=False,
+        review_required=bool(getattr(instr, "review_required", False)),
+        condition_matched=getattr(instr, "condition", "") or None,
+    )
+
+
+def _validate_output(
+    raw_output: Any,
+    instr: Any,
+) -> tuple[list[TaskSpec], dict[str, Any] | None, str | None]:
+    """Parse raw LLM output into TaskSpecs and optional report payload.
+
+    Accepted shapes:
+      - list[task]
+      - single task dict with title/description/etc.
+      - {"tasks": [...], "report": {...}}
+      - report-only dict, such as {"summary": "...", "recommendations": [...]}
+
+    Returns (specs, report, error_message).
+    """
    try:
        if isinstance(raw_output, str):
            data = json.loads(raw_output)
        else:
            data = raw_output

-        if not isinstance(data, list):
-            data = [data]
+        schema_error = _validate_against_schema(data, getattr(instr, "output_schema", ""))
+        if schema_error:
+            return [], None, schema_error
+
+        report: dict[str, Any] | None = None
+        task_items: list[Any]
+        if isinstance(data, dict) and ("tasks" in data or "report" in data):
+            maybe_report = data.get("report")
+            if maybe_report is not None and not isinstance(maybe_report, dict):
+                return [], None, "report must be a JSON object"
+            report = maybe_report
+            tasks = data.get("tasks", [])
+            if not isinstance(tasks, list):
+                return [], None, "tasks must be a JSON array"
+            task_items = tasks
+        elif isinstance(data, dict) and "title" not in data:
+            report = data
+            task_items = []
+        elif isinstance(data, list):
+            task_items = data
+        else:
+            task_items = [data]

        specs = []
-        for item in data:
+        for item in task_items:
+            if not isinstance(item, dict):
+                return [], None, "each task must be a JSON object"
            specs.append(TaskSpec(
                title=item.get("title", ""),
                description=item.get("description", ""),
@@ -162,6 +240,70 @@ def _validate_output(raw_output: Any, instr: Any) -> tuple[list[TaskSpec], str |
                source_type="instruction",
                source_id=instr.id,
            ))
-        return specs, None
+        return specs, report, None
    except (json.JSONDecodeError, AttributeError, KeyError, TypeError) as exc:
-        return [], str(exc)
+        return [], None, str(exc)
+
+
+def _validate_against_schema(data: Any, schema_path: str) -> str | None:
+    if not schema_path:
+        return None
+
+    path = Path(schema_path)
+    if not path.exists():
+        return None
+
+    try:
+        schema = json.loads(path.read_text(encoding="utf-8"))
+    except (OSError, json.JSONDecodeError) as exc:
+        return f"could not read output schema: {exc}"
+
+    return _validate_schema_node(data, schema, "$")
+
+
+def _validate_schema_node(data: Any, schema: dict[str, Any], path: str) -> str | None:
+    expected_type = schema.get("type")
+    if expected_type and not _matches_type(data, expected_type):
+        return f"{path}: expected {expected_type}"
+
+    if expected_type == "object":
+        required = schema.get("required", [])
+        if isinstance(required, list):
+            for key in required:
+                if isinstance(key, str) and key not in data:
+                    return f"{path}: missing required property {key!r}"
+        properties = schema.get("properties", {})
+        if isinstance(properties, dict):
+            for key, child_schema in properties.items():
+                if key in data and isinstance(child_schema, dict):
+                    error = _validate_schema_node(data[key], child_schema, f"{path}.{key}")
+                    if error:
+                        return error
+
+    if expected_type == "array":
+        item_schema = schema.get("items")
+        if isinstance(item_schema, dict):
+            for index, item in enumerate(data):
+                error = _validate_schema_node(item, item_schema, f"{path}[{index}]")
+                if error:
+                    return error
+
+    return None
+
+
+def _matches_type(data: Any, expected_type: str) -> bool:
+    if expected_type == "object":
+        return isinstance(data, dict)
+    if expected_type == "array":
+        return isinstance(data, list)
+    if expected_type == "string":
+        return isinstance(data, str)
+    if expected_type == "integer":
+        return isinstance(data, int) and not isinstance(data, bool)
+    if expected_type == "number":
+        return isinstance(data, (int, float)) and not isinstance(data, bool)
+    if expected_type == "boolean":
+        return isinstance(data, bool)
+    if expected_type == "null":
+        return data is None
+    return True