feat(ACTIVITY-WP-0016-T03): resilient per-item report recovery with quarantine lane

When the whole-document parse + one retry still fail, report instructions now run _resilient_report before the total-loss path. A brace/quote-aware scanner (_extract_object_spans) recovers each recommendation object whether pretty-printed across many lines or NDJSON one-per-line; a truncated tail gets a best-effort _try_repair; _partition_items validates each recovered object against the T02 item schema. Valid items survive (output_validated=True, partial=True), malformed/ over-maxItems items are quarantined with provenance (index, error, raw, reason), capped at 20. Error locality now matches the unit of work: one bad item costs one item, not the whole report. Verified against the real 06-26 shape: 7 valid recommendations + a truncated tail now recovers all 7 and quarantines the broken tail (previously the whole run was discarded). Happy-path maxItems top-N enforcement is deferred to T04 (count caps). Full suite: 215 passed, 1 skipped. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-26 17:56:28 +02:00
parent b41b6034ee
commit a70c00a789
2 changed files with 305 additions and 0 deletions
--- a/src/activity_core/rules/executor.py
+++ b/src/activity_core/rules/executor.py
@@ -178,6 +178,12 @@ def _execute(
                "error=%s, raw_output_preview=%r",
                instr.id, prompt_hash, error, preview,
            )
+            # Posture B (WP-0016-T03): try to recover a partial-but-usable
+            # report from individually-parseable items before declaring total
+            # loss. One bad item should cost one item, not the whole report.
+            recovered = _resilient_report(instr, raw_output, error, prompt_hash)
+            if recovered is not None:
+                return recovered
            failure_report = _invalid_output_report(instr, error, raw_output)
            if failure_report is not None:
                return InstructionResult(
@@ -279,6 +285,233 @@ def _invalid_output_report(
    return report


+# ---------------------------------------------------------------------------
+# Resilient report recovery (ACTIVITY-WP-0016-T03)
+#
+# Posture B — verify & mitigate at the producer→consumer boundary. When the
+# whole-document parse/validate fails, recover individually-parseable
+# recommendation objects, validate each against the item schema, keep the valid
+# ones, and quarantine the malformed/over-limit ones with provenance. One bad
+# item costs one item, not the whole report (error locality == unit of work).
+# ---------------------------------------------------------------------------
+
+_QUARANTINE_LIMIT = 20
+_SNIPPET_LIMIT = 200
+_SUMMARY_RE = re.compile(r'"summary"\s*:\s*"((?:[^"\\]|\\.)*)"')
+
+
+def _snippet(value: Any) -> str:
+    text = value if isinstance(value, str) else json.dumps(value, default=str)
+    return text[:_SNIPPET_LIMIT]
+
+
+def _report_contract(instr: Any) -> tuple[dict[str, Any] | None, int | None]:
+    """Extract (item_schema, max_items) for the recommendations list, if any."""
+    try:
+        schema = _load_output_schema(getattr(instr, "output_schema", ""))
+    except (OSError, json.JSONDecodeError, TypeError):
+        return None, None
+    if not isinstance(schema, dict):
+        return None, None
+    recs = (schema.get("properties") or {}).get("recommendations")
+    if not isinstance(recs, dict):
+        return None, None
+    item_schema = recs.get("items") if isinstance(recs.get("items"), dict) else None
+    max_items = recs.get("maxItems") if isinstance(recs.get("maxItems"), int) else None
+    return item_schema, max_items
+
+
+def _extract_object_spans(raw: str) -> list[tuple[str, bool]]:
+    """Return (span, complete) for each recommendation object in raw output.
+
+    Scans the `recommendations` array brace-aware and string-aware so it recovers
+    objects whether they are pretty-printed across many lines or emitted one per
+    line (NDJSON). A truncated trailing object is returned with complete=False.
+    """
+    key = raw.find('"recommendations"')
+    start_region = raw.find("[", key) if key >= 0 else -1
+    if start_region < 0:
+        return []
+    spans: list[tuple[str, bool]] = []
+    i, n = start_region + 1, len(raw)
+    while i < n:
+        ch = raw[i]
+        if ch == "]":
+            break
+        if ch != "{":
+            i += 1
+            continue
+        depth, in_str, esc, j = 0, False, False, i
+        closed = False
+        while j < n:
+            c = raw[j]
+            if in_str:
+                if esc:
+                    esc = False
+                elif c == "\\":
+                    esc = True
+                elif c == '"':
+                    in_str = False
+            elif c == '"':
+                in_str = True
+            elif c == "{":
+                depth += 1
+            elif c == "}":
+                depth -= 1
+                if depth == 0:
+                    spans.append((raw[i:j + 1], True))
+                    closed = True
+                    break
+            j += 1
+        if not closed:
+            spans.append((raw[i:], False))  # truncated tail
+            break
+        i = j + 1
+    return spans
+
+
+def _try_repair(span: str) -> str:
+    """Best-effort close of a truncated JSON object: balance quote, braces, brackets."""
+    in_str, esc, depth_c, depth_b = False, False, 0, 0
+    for c in span:
+        if in_str:
+            if esc:
+                esc = False
+            elif c == "\\":
+                esc = True
+            elif c == '"':
+                in_str = False
+        elif c == '"':
+            in_str = True
+        elif c == "{":
+            depth_c += 1
+        elif c == "}":
+            depth_c -= 1
+        elif c == "[":
+            depth_b += 1
+        elif c == "]":
+            depth_b -= 1
+    repaired = span.rstrip().rstrip(",")
+    if in_str:
+        repaired += '"'
+    return repaired + "]" * max(depth_b, 0) + "}" * max(depth_c, 0)
+
+
+def _recover_recommendations(
+    raw: str,
+) -> tuple[str | None, list[dict[str, Any]], list[dict[str, Any]]]:
+    """Recover (summary, items, quarantined) from a failed report payload."""
+    summary_match = _SUMMARY_RE.search(raw)
+    summary = None
+    if summary_match:
+        try:
+            summary = json.loads(f'"{summary_match.group(1)}"')
+        except json.JSONDecodeError:
+            summary = summary_match.group(1)
+    items: list[dict[str, Any]] = []
+    quarantined: list[dict[str, Any]] = []
+    for index, (span, complete) in enumerate(_extract_object_spans(raw)):
+        parsed: Any = None
+        try:
+            parsed = json.loads(span)
+        except json.JSONDecodeError as exc:
+            if not complete:
+                try:
+                    parsed = json.loads(_try_repair(span))
+                except json.JSONDecodeError:
+                    parsed = None
+            if parsed is None:
+                quarantined.append(
+                    {"index": index, "error": str(exc), "raw": _snippet(span),
+                     "reason": "truncated" if not complete else "unparseable"}
+                )
+                continue
+        if isinstance(parsed, dict):
+            items.append(parsed)
+        else:
+            quarantined.append(
+                {"index": index, "error": "item is not a JSON object",
+                 "raw": _snippet(span)}
+            )
+    return summary, items, quarantined
+
+
+def _partition_items(
+    items: list[dict[str, Any]],
+    item_schema: dict[str, Any] | None,
+    max_items: int | None,
+) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
+    """Split items into (valid, quarantined): schema-invalid then over-limit."""
+    valid: list[dict[str, Any]] = []
+    quarantined: list[dict[str, Any]] = []
+    for index, item in enumerate(items):
+        error = (
+            _validate_schema_node(item, item_schema, f"recommendations[{index}]")
+            if item_schema
+            else None
+        )
+        if error:
+            quarantined.append({"index": index, "error": error, "raw": _snippet(item)})
+        else:
+            valid.append(item)
+    if max_items is not None and len(valid) > max_items:
+        for item in valid[max_items:]:
+            quarantined.append(
+                {"index": None, "error": f"exceeds maxItems={max_items}",
+                 "raw": _snippet(item), "reason": "over_limit"}
+            )
+        valid = valid[:max_items]
+    return valid, quarantined
+
+
+def _resilient_report(
+    instr: Any,
+    raw_output: Any,
+    original_error: str,
+    prompt_hash: str | None,
+) -> InstructionResult | None:
+    """Recover a partial-but-usable report from output that failed validation.
+
+    Returns None when nothing usable can be recovered, so the caller falls back
+    to the total-loss diagnostic artifact (_invalid_output_report).
+    """
+    if not getattr(instr, "report_sinks", None) or not isinstance(raw_output, str):
+        return None
+    item_schema, max_items = _report_contract(instr)
+    summary, items, quarantined = _recover_recommendations(raw_output)
+    if not items:
+        return None
+    valid, item_quarantine = _partition_items(items, item_schema, max_items)
+    quarantined.extend(item_quarantine)
+    if not valid:
+        return None
+    report: dict[str, Any] = {
+        "summary": summary
+        or f"Partial daily triage: recovered {len(valid)} recommendation(s) "
+        "after the full report failed validation.",
+        "recommendations": valid,
+        "status": "partial",
+        "partial": True,
+        "quarantined_count": len(quarantined),
+        "quarantined_items": quarantined[:_QUARANTINE_LIMIT],
+        "recovery_note": f"original validation error: {original_error}",
+    }
+    logger.warning(
+        "instruction_output_recovered: instruction=%r, kept=%d, quarantined=%d",
+        getattr(instr, "id", None), len(valid), len(quarantined),
+    )
+    return InstructionResult(
+        tasks=[],
+        report=report,
+        prompt_hash=prompt_hash,
+        model=getattr(instr, "model", None),
+        output_validated=True,
+        review_required=True,
+        condition_matched=getattr(instr, "condition", "") or None,
+        validation_error=None,
+    )
+
+
 def _execution_failure_report(instr: Any, error: str) -> dict[str, Any] | None:
    """Build a durable diagnostic report when a report instruction cannot run."""
    if not getattr(instr, "report_sinks", None):
--- a/tests/rules/test_executor.py
+++ b/tests/rules/test_executor.py
@@ -403,6 +403,78 @@ def test_execute_instruction_with_audit_rejects_invalid_report_schema():
    assert llm.call_count == 2


+# ── WP-0016-T03 resilient report recovery ─────────────────────────────────────
+
+def _valid_rec(rank: int) -> dict[str, Any]:
+    return {
+        "rank": rank,
+        "candidate": f"WS-{rank}",
+        "action": "work-next",
+        "why": f"reason {rank}",
+        "wsjf": {"score": 5.0},
+    }
+
+
+def _pretty_triage_with_truncated_tail(num_valid: int) -> str:
+    body = ",\n".join("    " + json.dumps(_valid_rec(i)) for i in range(1, num_valid + 1))
+    # Trailing object is cut off mid-string — the whole document is invalid JSON,
+    # reproducing the 2026-06-26 failure shape (valid prefix, broken tail).
+    return (
+        '{\n  "summary": "Daily triage.",\n  "recommendations": [\n'
+        + body
+        + ',\n    {\n      "rank": '
+        + str(num_valid + 1)
+        + ',\n      "candidate": "WS-X",\n      "action": "work-'
+    )
+
+
+def test_resilient_report_recovers_valid_prefix_and_quarantines_truncated_tail():
+    raw = _pretty_triage_with_truncated_tail(7)
+    llm = _CountingLLM([raw, raw])
+    instr = _instr(
+        id="daily-triage-report",
+        prompt="Report.",
+        trusted_fields=[],
+        output_schema="schemas/daily-triage-report.json",
+        report_sinks=[{"type": "working-memory"}],
+    )
+
+    result = execute_instruction_with_audit(instr, _Event(), {}, llm)
+
+    assert result.output_validated is True
+    assert result.review_required is True
+    assert result.report is not None
+    assert result.report["partial"] is True
+    assert len(result.report["recommendations"]) == 7
+    assert result.report["summary"] == "Daily triage."
+    assert result.report["quarantined_count"] >= 1
+    # The broken tail is dropped — either as an unparseable/truncated span or,
+    # if _try_repair salvages its structure, as a schema-invalid item. Either way
+    # it carries a diagnostic error and never pollutes the surviving report.
+    assert result.report["quarantined_items"][0]["error"]
+
+
+def test_resilient_report_quarantines_one_bad_item_among_valid():
+    recs = [_valid_rec(1), {"candidate": "WS-2", "action": "x", "why": "no rank"}, _valid_rec(3)]
+    raw = json.dumps({"summary": "Triage.", "recommendations": recs})
+    llm = _CountingLLM([raw, raw])
+    instr = _instr(
+        id="daily-triage-report",
+        prompt="Report.",
+        trusted_fields=[],
+        output_schema="schemas/daily-triage-report.json",
+        report_sinks=[{"type": "working-memory"}],
+    )
+
+    result = execute_instruction_with_audit(instr, _Event(), {}, llm)
+
+    assert result.output_validated is True
+    assert result.report["partial"] is True
+    assert len(result.report["recommendations"]) == 2
+    assert result.report["quarantined_count"] == 1
+    assert "rank" in result.report["quarantined_items"][0]["error"]
+
+
 def test_execute_instruction_with_audit_preserves_invalid_report_with_sinks(
    tmp_path,
    monkeypatch,