Wire instruction report execution

2026-05-19 18:28:23 +02:00
parent 0e7084207e
commit 0dc342eb1b
7 changed files with 513 additions and 16 deletions
--- a/tests/test_instruction_evaluation.py
+++ b/tests/test_instruction_evaluation.py
@@ -0,0 +1,116 @@
+from __future__ import annotations
+
+import json
+
+import pytest
+
+from activity_core import activities
+
+
+class FakeLLMClient:
+    def __init__(self, response: str) -> None:
+        self.response = response
+        self.calls: list[tuple[str, str]] = []
+
+    def complete(self, prompt: str, model: str = "") -> str:
+        self.calls.append((prompt, model))
+        return self.response
+
+
+@pytest.mark.asyncio
+async def test_evaluate_instructions_returns_task_specs_with_audit(monkeypatch) -> None:
+    llm = FakeLLMClient(json.dumps([
+        {
+            "title": "Run daily triage",
+            "description": "Review State Hub loose ends.",
+            "priority": "high",
+            "labels": ["triage"],
+        }
+    ]))
+    monkeypatch.setattr(activities, "get_llm_client", lambda: llm)
+
+    result = await activities.evaluate_instructions({
+        "instructions": [
+            {
+                "id": "daily-triage",
+                "trusted_fields": ["context.summary.open_tasks"],
+                "model": "test-model",
+                "prompt": "Open tasks: {context.summary.open_tasks}",
+                "output_schema": "",
+                "review_required": False,
+            }
+        ],
+        "event": {},
+        "context": {"summary": {"open_tasks": 3}},
+    })
+
+    task_specs = result["task_specs"]
+    assert len(task_specs) == 1
+    spec = task_specs[0]
+    assert spec["title"] == "Run daily triage"
+    assert spec["source_type"] == "instruction"
+    assert spec["source_id"] == "daily-triage"
+    assert spec["model"] == "test-model"
+    assert spec["output_validated"] is True
+    assert spec["review_required"] is False
+    assert spec["prompt_hash"] is not None
+    assert len(spec["prompt_hash"]) == 64
+    assert result["reports"] == []
+    assert llm.calls == [("Open tasks: 3", "test-model")]
+
+
+@pytest.mark.asyncio
+async def test_evaluate_instructions_returns_report_payload(monkeypatch) -> None:
+    llm = FakeLLMClient(json.dumps({
+        "summary": "State Hub has open loose ends.",
+        "recommendations": [{"candidate": "CUST-WP-0045", "action": "work-next"}],
+    }))
+    monkeypatch.setattr(activities, "get_llm_client", lambda: llm)
+
+    result = await activities.evaluate_instructions({
+        "instructions": [
+            {
+                "id": "daily-triage-report",
+                "trusted_fields": [],
+                "model": "test-model",
+                "prompt": "Run report.",
+                "output_schema": "schemas/daily-triage-report.json",
+                "review_required": False,
+            }
+        ],
+        "event": {},
+        "context": {},
+    })
+
+    assert result["task_specs"] == []
+    assert len(result["reports"]) == 1
+    report = result["reports"][0]
+    assert report["instruction_id"] == "daily-triage-report"
+    assert report["report"]["summary"] == "State Hub has open loose ends."
+    assert report["output_validated"] is True
+    assert report["prompt_hash"] is not None
+
+
+@pytest.mark.asyncio
+async def test_evaluate_instructions_without_llm_client_returns_no_tasks(monkeypatch) -> None:
+    class RaisingClient:
+        def complete(self, prompt: str, model: str = "") -> str:  # noqa: ARG002
+            raise RuntimeError("not configured")
+
+    monkeypatch.setattr(activities, "get_llm_client", lambda: RaisingClient())
+
+    result = await activities.evaluate_instructions({
+        "instructions": [
+            {
+                "id": "daily-triage",
+                "trusted_fields": [],
+                "model": "test-model",
+                "prompt": "Run triage.",
+                "output_schema": "schemas/daily-triage-report.json",
+            }
+        ],
+        "event": {},
+        "context": {},
+    })
+
+    assert result == {"task_specs": [], "reports": []}