Harden WSJF triage report recovery

2026-06-05 19:27:03 +02:00
parent 20d4f26166
commit 42e373aba1
10 changed files with 223 additions and 8 deletions
--- a/src/activity_core/activities.py
+++ b/src/activity_core/activities.py
@@ -328,6 +328,7 @@ async def evaluate_instructions(payload: dict) -> dict:
                "model": result.model,
                "output_validated": result.output_validated,
                "review_required": result.review_required,
+                "validation_error": result.validation_error,
            })
        for spec in result.tasks:
            task_specs.append({
--- a/src/activity_core/report_sinks.py
+++ b/src/activity_core/report_sinks.py
@@ -126,6 +126,9 @@ def _post_state_hub_progress(
            "activity_core_run_id": run_id,
            "instruction_id": instruction_id,
            "scheduled_for": payload.get("scheduled_for"),
+            "output_validated": report_entry.get("output_validated"),
+            "review_required": report_entry.get("review_required"),
+            "validation_error": report_entry.get("validation_error"),
            "report": report,
        },
    }
@@ -179,6 +182,7 @@ def _render_markdown(
    report = report_entry.get("report") or {}
    instruction_id = report_entry.get("instruction_id", "instruction")
    summary = report.get("summary", "")
+    validation_error = report_entry.get("validation_error")
    lines = [
        "---",
        "type: working-memory",
@@ -187,6 +191,10 @@ def _render_markdown(
        f"activity_core_run_id: {payload.get('run_id')}",
        f"instruction_id: {instruction_id}",
        f"scheduled_for: {payload.get('scheduled_for')}",
+        f"output_validated: {str(bool(report_entry.get('output_validated'))).lower()}",
+        f"review_required: {str(bool(report_entry.get('review_required'))).lower()}",
+        f"model: {report_entry.get('model') or ''}",
+        f"prompt_hash: {report_entry.get('prompt_hash') or ''}",
        f"created: {datetime.now(tz=timezone.utc).isoformat()}",
        "---",
        "",
@@ -195,6 +203,8 @@ def _render_markdown(
    ]
    if summary:
        lines.extend([summary, ""])
+    if validation_error:
+        lines.extend(["Validation error:", "", f"`{validation_error}`", ""])
    lines.extend([
        "```json",
        json.dumps(report, indent=2, sort_keys=True),
--- a/src/activity_core/rules/executor.py
+++ b/src/activity_core/rules/executor.py
@@ -39,6 +39,7 @@ class InstructionResult:
    output_validated: bool = False
    review_required: bool = False
    condition_matched: str | None = None
+    validation_error: str | None = None


 def _resolve_path(obj: Any, path: str) -> Any:
@@ -164,7 +165,19 @@ def _execute(
                "error=%s, raw_output_preview=%r",
                instr.id, prompt_hash, error, preview,
            )
-            return _empty_result(instr, prompt_hash=prompt_hash)
+            failure_report = _invalid_output_report(instr, error, raw_output)
+            if failure_report is not None:
+                return InstructionResult(
+                    tasks=[],
+                    report=failure_report,
+                    prompt_hash=prompt_hash,
+                    model=instr.model,
+                    output_validated=False,
+                    review_required=True,
+                    condition_matched=instr.condition or None,
+                    validation_error=error,
+                )
+            return _empty_result(instr, prompt_hash=prompt_hash, validation_error=error)

    return InstructionResult(
        tasks=task_specs,
@@ -193,7 +206,11 @@ def _llm_run_config(instr: Any) -> dict[str, Any]:
    return config


-def _empty_result(instr: Any, prompt_hash: str | None = None) -> InstructionResult:
+def _empty_result(
+    instr: Any,
+    prompt_hash: str | None = None,
+    validation_error: str | None = None,
+) -> InstructionResult:
    return InstructionResult(
        tasks=[],
        prompt_hash=prompt_hash,
@@ -201,9 +218,54 @@ def _empty_result(instr: Any, prompt_hash: str | None = None) -> InstructionResu
        output_validated=False,
        review_required=bool(getattr(instr, "review_required", False)),
        condition_matched=getattr(instr, "condition", "") or None,
+        validation_error=validation_error,
    )


+def _invalid_output_report(
+    instr: Any,
+    validation_error: str,
+    raw_output: Any,
+) -> dict[str, Any] | None:
+    """Build a durable diagnostic report for invalid report-sink output.
+
+    Task-only instructions keep the legacy empty-result behavior. Instructions
+    with report sinks should leave operators a bounded artifact that preserves
+    the partial model output without marking it as schema-valid.
+    """
+    if not getattr(instr, "report_sinks", None):
+        return None
+
+    partial_output: Any
+    raw_preview: str | None = None
+    if isinstance(raw_output, str):
+        try:
+            partial_output = json.loads(raw_output)
+        except json.JSONDecodeError:
+            partial_output = None
+            raw_preview = raw_output[:4000]
+    else:
+        partial_output = raw_output
+
+    report: dict[str, Any] = {
+        "summary": (
+            f"Instruction {instr.id} produced output that failed validation; "
+            "partial output was preserved for operator review."
+        ),
+        "status": "validation_failed",
+        "validation_error": validation_error,
+    }
+    if isinstance(partial_output, dict):
+        if isinstance(partial_output.get("summary"), str):
+            report["partial_summary"] = partial_output["summary"]
+        report["partial_report"] = partial_output
+    elif isinstance(partial_output, list):
+        report["partial_report"] = partial_output
+    elif raw_preview is not None:
+        report["raw_output_preview"] = raw_preview
+    return report
+
+
 def _validate_output(
    raw_output: Any,
    instr: Any,