feat(ACTIVITY-WP-0016-T02): strict bounded daily-triage output schema

Replace the accept-anything recommendations.items ({type: object}) with a strict per-item contract (required [rank, candidate, action, why] + typed wsjf) and a maxItems:7 hint. Strict item structure is what lets the T03 boundary parser validate each recommendation independently and quarantine only malformed ones. maxItems is a producer hint (prompt + llm-connect json_schema + T03 mitigation), NOT a hard reject — a hard maxItems reject would discard a whole 16-item report, the blast-radius bug WP-0016 removes. DEPLOY COUPLING: the strict schema is also consumed by the current whole-doc validator, so it must ship with T03's per-item quarantine parser; until then it increases whole-doc hard-fails. Prompt + max_tokens headroom + NDJSON framing are documented as a runtime-bundle handoff. Updated four tests to the strict contract; the forwarded-schema test now reads the live schema file instead of hard-coding it. Full suite: 213 passed, 1 skipped. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-26 17:36:24 +02:00
parent 0e9e18a59a
commit 61f278d643
4 changed files with 93 additions and 15 deletions
--- a/tests/test_instruction_evaluation.py
+++ b/tests/test_instruction_evaluation.py
@@ -1,6 +1,7 @@
 from __future__ import annotations

 import json
+from pathlib import Path

 import pytest

@@ -70,7 +71,14 @@ async def test_evaluate_instructions_returns_task_specs_with_audit(monkeypatch)
 async def test_evaluate_instructions_returns_report_payload(monkeypatch) -> None:
    llm = FakeLLMClient(json.dumps({
        "summary": "State Hub has open loose ends.",
-        "recommendations": [{"candidate": "CUST-WP-0045", "action": "work-next"}],
+        "recommendations": [
+            {
+                "rank": 1,
+                "candidate": "CUST-WP-0045",
+                "action": "work-next",
+                "why": "Open loose ends.",
+            }
+        ],
    }))
    monkeypatch.setattr(activities, "get_llm_client", lambda: llm)

@@ -209,6 +217,12 @@ async def test_evaluate_instructions_forwards_llm_connect_depth_config(monkeypat
        "context": {},
    })

+    # Read the live schema file rather than hard-coding it, so the forwarded
+    # json_schema assertion tracks schemas/daily-triage-report.json as the
+    # contract evolves (ACTIVITY-WP-0016-T02).
+    expected_schema = json.loads(
+        Path("schemas/daily-triage-report.json").read_text(encoding="utf-8")
+    )
    assert llm.calls[0][2] == {
        "model_name": "custodian-triage-balanced",
        "temperature": 0.2,
@@ -216,16 +230,6 @@ async def test_evaluate_instructions_forwards_llm_connect_depth_config(monkeypat
        "max_depth": 2,
        "model_params": {
            "reasoning_effort": "medium",
-            "json_schema": {
-                "type": "object",
-                "required": ["summary", "recommendations"],
-                "properties": {
-                    "summary": {"type": "string"},
-                    "recommendations": {
-                        "type": "array",
-                        "items": {"type": "object"},
-                    },
-                },
-            },
+            "json_schema": expected_schema,
        },
    }