Wire instruction report execution

2026-05-19 18:28:23 +02:00
parent 0e7084207e
commit 0dc342eb1b
7 changed files with 513 additions and 16 deletions
--- a/tests/rules/test_executor.py
+++ b/tests/rules/test_executor.py
@@ -21,6 +21,7 @@ from activity_core.rules.executor import (
    UntrustedFieldError,
    _render_prompt,
    execute_instruction,
+    execute_instruction_with_audit,
 )


@@ -201,6 +202,82 @@ def test_valid_llm_output_returns_task_spec():
    assert result[0].source_type == "instruction"


+def test_execute_instruction_with_audit_returns_metadata():
+    task_data = [{"title": "Run triage", "priority": "high"}]
+    llm = _CountingLLM([json.dumps(task_data)])
+    instr = _instr(
+        id="daily-triage",
+        condition="",
+        prompt="Check State Hub.",
+        trusted_fields=[],
+        model="test-model",
+        review_required=True,
+    )
+
+    result = execute_instruction_with_audit(instr, _Event(), {}, llm)
+
+    assert len(result.tasks) == 1
+    assert result.tasks[0].source_id == "daily-triage"
+    assert result.prompt_hash is not None
+    assert len(result.prompt_hash) == 64
+    assert result.model == "test-model"
+    assert result.output_validated is True
+    assert result.review_required is True
+
+
+def test_execute_instruction_with_audit_accepts_report_payload():
+    report_data = {
+        "summary": "State Hub has loose ends.",
+        "recommendations": [{"action": "revisit", "candidate": "CUST-WP-0045"}],
+    }
+    llm = _CountingLLM([json.dumps(report_data)])
+    instr = _instr(
+        id="daily-triage-report",
+        prompt="Report.",
+        trusted_fields=[],
+        output_schema="schemas/daily-triage-report.json",
+    )
+
+    result = execute_instruction_with_audit(instr, _Event(), {}, llm)
+
+    assert result.tasks == []
+    assert result.report == report_data
+    assert result.output_validated is True
+
+
+def test_execute_instruction_with_audit_rejects_invalid_report_schema():
+    report_data = {"summary": "Missing recommendations."}
+    llm = _CountingLLM([json.dumps(report_data), json.dumps(report_data)])
+    instr = _instr(
+        id="daily-triage-report",
+        prompt="Report.",
+        trusted_fields=[],
+        output_schema="schemas/daily-triage-report.json",
+    )
+
+    result = execute_instruction_with_audit(instr, _Event(), {}, llm)
+
+    assert result.tasks == []
+    assert result.report is None
+    assert result.output_validated is False
+    assert llm.call_count == 2
+
+
+def test_execute_instruction_with_audit_accepts_report_and_tasks_envelope():
+    envelope = {
+        "report": {"summary": "Review needed."},
+        "tasks": [{"title": "Inspect CUST-WP-0045"}],
+    }
+    llm = _CountingLLM([json.dumps(envelope)])
+    instr = _instr(id="daily-triage-report", prompt="Report.", trusted_fields=[])
+
+    result = execute_instruction_with_audit(instr, _Event(), {}, llm)
+
+    assert result.report == {"summary": "Review needed."}
+    assert len(result.tasks) == 1
+    assert result.tasks[0].title == "Inspect CUST-WP-0045"
+
+
 # ── Condition pre-filter ───────────────────────────────────────────────────────

 def test_condition_false_skips_llm():
--- a/tests/test_instruction_evaluation.py
+++ b/tests/test_instruction_evaluation.py
@@ -0,0 +1,116 @@
+from __future__ import annotations
+
+import json
+
+import pytest
+
+from activity_core import activities
+
+
+class FakeLLMClient:
+    def __init__(self, response: str) -> None:
+        self.response = response
+        self.calls: list[tuple[str, str]] = []
+
+    def complete(self, prompt: str, model: str = "") -> str:
+        self.calls.append((prompt, model))
+        return self.response
+
+
+@pytest.mark.asyncio
+async def test_evaluate_instructions_returns_task_specs_with_audit(monkeypatch) -> None:
+    llm = FakeLLMClient(json.dumps([
+        {
+            "title": "Run daily triage",
+            "description": "Review State Hub loose ends.",
+            "priority": "high",
+            "labels": ["triage"],
+        }
+    ]))
+    monkeypatch.setattr(activities, "get_llm_client", lambda: llm)
+
+    result = await activities.evaluate_instructions({
+        "instructions": [
+            {
+                "id": "daily-triage",
+                "trusted_fields": ["context.summary.open_tasks"],
+                "model": "test-model",
+                "prompt": "Open tasks: {context.summary.open_tasks}",
+                "output_schema": "",
+                "review_required": False,
+            }
+        ],
+        "event": {},
+        "context": {"summary": {"open_tasks": 3}},
+    })
+
+    task_specs = result["task_specs"]
+    assert len(task_specs) == 1
+    spec = task_specs[0]
+    assert spec["title"] == "Run daily triage"
+    assert spec["source_type"] == "instruction"
+    assert spec["source_id"] == "daily-triage"
+    assert spec["model"] == "test-model"
+    assert spec["output_validated"] is True
+    assert spec["review_required"] is False
+    assert spec["prompt_hash"] is not None
+    assert len(spec["prompt_hash"]) == 64
+    assert result["reports"] == []
+    assert llm.calls == [("Open tasks: 3", "test-model")]
+
+
+@pytest.mark.asyncio
+async def test_evaluate_instructions_returns_report_payload(monkeypatch) -> None:
+    llm = FakeLLMClient(json.dumps({
+        "summary": "State Hub has open loose ends.",
+        "recommendations": [{"candidate": "CUST-WP-0045", "action": "work-next"}],
+    }))
+    monkeypatch.setattr(activities, "get_llm_client", lambda: llm)
+
+    result = await activities.evaluate_instructions({
+        "instructions": [
+            {
+                "id": "daily-triage-report",
+                "trusted_fields": [],
+                "model": "test-model",
+                "prompt": "Run report.",
+                "output_schema": "schemas/daily-triage-report.json",
+                "review_required": False,
+            }
+        ],
+        "event": {},
+        "context": {},
+    })
+
+    assert result["task_specs"] == []
+    assert len(result["reports"]) == 1
+    report = result["reports"][0]
+    assert report["instruction_id"] == "daily-triage-report"
+    assert report["report"]["summary"] == "State Hub has open loose ends."
+    assert report["output_validated"] is True
+    assert report["prompt_hash"] is not None
+
+
+@pytest.mark.asyncio
+async def test_evaluate_instructions_without_llm_client_returns_no_tasks(monkeypatch) -> None:
+    class RaisingClient:
+        def complete(self, prompt: str, model: str = "") -> str:  # noqa: ARG002
+            raise RuntimeError("not configured")
+
+    monkeypatch.setattr(activities, "get_llm_client", lambda: RaisingClient())
+
+    result = await activities.evaluate_instructions({
+        "instructions": [
+            {
+                "id": "daily-triage",
+                "trusted_fields": [],
+                "model": "test-model",
+                "prompt": "Run triage.",
+                "output_schema": "schemas/daily-triage-report.json",
+            }
+        ],
+        "event": {},
+        "context": {},
+    })
+
+    assert result == {"task_specs": [], "reports": []}