generated from coulomb/repo-seed
feat(ACTIVITY-WP-0016-T03): resilient per-item report recovery with quarantine lane
When the whole-document parse + one retry still fail, report instructions now run _resilient_report before the total-loss path. A brace/quote-aware scanner (_extract_object_spans) recovers each recommendation object whether pretty-printed across many lines or NDJSON one-per-line; a truncated tail gets a best-effort _try_repair; _partition_items validates each recovered object against the T02 item schema. Valid items survive (output_validated=True, partial=True), malformed/ over-maxItems items are quarantined with provenance (index, error, raw, reason), capped at 20. Error locality now matches the unit of work: one bad item costs one item, not the whole report. Verified against the real 06-26 shape: 7 valid recommendations + a truncated tail now recovers all 7 and quarantines the broken tail (previously the whole run was discarded). Happy-path maxItems top-N enforcement is deferred to T04 (count caps). Full suite: 215 passed, 1 skipped. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -403,6 +403,78 @@ def test_execute_instruction_with_audit_rejects_invalid_report_schema():
|
||||
assert llm.call_count == 2
|
||||
|
||||
|
||||
# ── WP-0016-T03 resilient report recovery ─────────────────────────────────────
|
||||
|
||||
def _valid_rec(rank: int) -> dict[str, Any]:
|
||||
return {
|
||||
"rank": rank,
|
||||
"candidate": f"WS-{rank}",
|
||||
"action": "work-next",
|
||||
"why": f"reason {rank}",
|
||||
"wsjf": {"score": 5.0},
|
||||
}
|
||||
|
||||
|
||||
def _pretty_triage_with_truncated_tail(num_valid: int) -> str:
|
||||
body = ",\n".join(" " + json.dumps(_valid_rec(i)) for i in range(1, num_valid + 1))
|
||||
# Trailing object is cut off mid-string — the whole document is invalid JSON,
|
||||
# reproducing the 2026-06-26 failure shape (valid prefix, broken tail).
|
||||
return (
|
||||
'{\n "summary": "Daily triage.",\n "recommendations": [\n'
|
||||
+ body
|
||||
+ ',\n {\n "rank": '
|
||||
+ str(num_valid + 1)
|
||||
+ ',\n "candidate": "WS-X",\n "action": "work-'
|
||||
)
|
||||
|
||||
|
||||
def test_resilient_report_recovers_valid_prefix_and_quarantines_truncated_tail():
|
||||
raw = _pretty_triage_with_truncated_tail(7)
|
||||
llm = _CountingLLM([raw, raw])
|
||||
instr = _instr(
|
||||
id="daily-triage-report",
|
||||
prompt="Report.",
|
||||
trusted_fields=[],
|
||||
output_schema="schemas/daily-triage-report.json",
|
||||
report_sinks=[{"type": "working-memory"}],
|
||||
)
|
||||
|
||||
result = execute_instruction_with_audit(instr, _Event(), {}, llm)
|
||||
|
||||
assert result.output_validated is True
|
||||
assert result.review_required is True
|
||||
assert result.report is not None
|
||||
assert result.report["partial"] is True
|
||||
assert len(result.report["recommendations"]) == 7
|
||||
assert result.report["summary"] == "Daily triage."
|
||||
assert result.report["quarantined_count"] >= 1
|
||||
# The broken tail is dropped — either as an unparseable/truncated span or,
|
||||
# if _try_repair salvages its structure, as a schema-invalid item. Either way
|
||||
# it carries a diagnostic error and never pollutes the surviving report.
|
||||
assert result.report["quarantined_items"][0]["error"]
|
||||
|
||||
|
||||
def test_resilient_report_quarantines_one_bad_item_among_valid():
|
||||
recs = [_valid_rec(1), {"candidate": "WS-2", "action": "x", "why": "no rank"}, _valid_rec(3)]
|
||||
raw = json.dumps({"summary": "Triage.", "recommendations": recs})
|
||||
llm = _CountingLLM([raw, raw])
|
||||
instr = _instr(
|
||||
id="daily-triage-report",
|
||||
prompt="Report.",
|
||||
trusted_fields=[],
|
||||
output_schema="schemas/daily-triage-report.json",
|
||||
report_sinks=[{"type": "working-memory"}],
|
||||
)
|
||||
|
||||
result = execute_instruction_with_audit(instr, _Event(), {}, llm)
|
||||
|
||||
assert result.output_validated is True
|
||||
assert result.report["partial"] is True
|
||||
assert len(result.report["recommendations"]) == 2
|
||||
assert result.report["quarantined_count"] == 1
|
||||
assert "rank" in result.report["quarantined_items"][0]["error"]
|
||||
|
||||
|
||||
def test_execute_instruction_with_audit_preserves_invalid_report_with_sinks(
|
||||
tmp_path,
|
||||
monkeypatch,
|
||||
|
||||
Reference in New Issue
Block a user