feat(ACTIVITY-WP-0016-T03): resilient per-item report recovery with quarantine lane

When the whole-document parse + one retry still fail, report instructions now run
_resilient_report before the total-loss path. A brace/quote-aware scanner
(_extract_object_spans) recovers each recommendation object whether pretty-printed
across many lines or NDJSON one-per-line; a truncated tail gets a best-effort
_try_repair; _partition_items validates each recovered object against the T02 item
schema. Valid items survive (output_validated=True, partial=True), malformed/
over-maxItems items are quarantined with provenance (index, error, raw, reason),
capped at 20. Error locality now matches the unit of work: one bad item costs one
item, not the whole report.

Verified against the real 06-26 shape: 7 valid recommendations + a truncated tail
now recovers all 7 and quarantines the broken tail (previously the whole run was
discarded). Happy-path maxItems top-N enforcement is deferred to T04 (count caps).
Full suite: 215 passed, 1 skipped.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-26 17:56:28 +02:00
parent b41b6034ee
commit a70c00a789
2 changed files with 305 additions and 0 deletions

View File

@@ -403,6 +403,78 @@ def test_execute_instruction_with_audit_rejects_invalid_report_schema():
assert llm.call_count == 2
# ── WP-0016-T03 resilient report recovery ─────────────────────────────────────
def _valid_rec(rank: int) -> dict[str, Any]:
return {
"rank": rank,
"candidate": f"WS-{rank}",
"action": "work-next",
"why": f"reason {rank}",
"wsjf": {"score": 5.0},
}
def _pretty_triage_with_truncated_tail(num_valid: int) -> str:
body = ",\n".join(" " + json.dumps(_valid_rec(i)) for i in range(1, num_valid + 1))
# Trailing object is cut off mid-string — the whole document is invalid JSON,
# reproducing the 2026-06-26 failure shape (valid prefix, broken tail).
return (
'{\n "summary": "Daily triage.",\n "recommendations": [\n'
+ body
+ ',\n {\n "rank": '
+ str(num_valid + 1)
+ ',\n "candidate": "WS-X",\n "action": "work-'
)
def test_resilient_report_recovers_valid_prefix_and_quarantines_truncated_tail():
raw = _pretty_triage_with_truncated_tail(7)
llm = _CountingLLM([raw, raw])
instr = _instr(
id="daily-triage-report",
prompt="Report.",
trusted_fields=[],
output_schema="schemas/daily-triage-report.json",
report_sinks=[{"type": "working-memory"}],
)
result = execute_instruction_with_audit(instr, _Event(), {}, llm)
assert result.output_validated is True
assert result.review_required is True
assert result.report is not None
assert result.report["partial"] is True
assert len(result.report["recommendations"]) == 7
assert result.report["summary"] == "Daily triage."
assert result.report["quarantined_count"] >= 1
# The broken tail is dropped — either as an unparseable/truncated span or,
# if _try_repair salvages its structure, as a schema-invalid item. Either way
# it carries a diagnostic error and never pollutes the surviving report.
assert result.report["quarantined_items"][0]["error"]
def test_resilient_report_quarantines_one_bad_item_among_valid():
recs = [_valid_rec(1), {"candidate": "WS-2", "action": "x", "why": "no rank"}, _valid_rec(3)]
raw = json.dumps({"summary": "Triage.", "recommendations": recs})
llm = _CountingLLM([raw, raw])
instr = _instr(
id="daily-triage-report",
prompt="Report.",
trusted_fields=[],
output_schema="schemas/daily-triage-report.json",
report_sinks=[{"type": "working-memory"}],
)
result = execute_instruction_with_audit(instr, _Event(), {}, llm)
assert result.output_validated is True
assert result.report["partial"] is True
assert len(result.report["recommendations"]) == 2
assert result.report["quarantined_count"] == 1
assert "rank" in result.report["quarantined_items"][0]["error"]
def test_execute_instruction_with_audit_preserves_invalid_report_with_sinks(
tmp_path,
monkeypatch,