generated from coulomb/repo-seed
feat(ACTIVITY-WP-0016-T04): producer trust-boundary guardrails + ADR-004
Add ADR-004 documenting the producer trust boundary: untrusted producers (LLM, agent, human; erroneous and malicious), the trust-but-handle vs verify-and-mitigate postures, error-locality and quarantine-with-provenance principles, and the concrete activity-core mechanisms. Implement producer-agnostic guardrails in executor.py, applied uniformly on the happy path and the recovery path via _partition_items: structural-type -> schema -> structural caps (_MAX_DEPTH, _MAX_STRING_LEN) -> reference allow-list -> count cap. Each quarantine carries a reason. Closes the happy-path maxItems count cap deferred from T03 (valid 9-item report keeps 7, quarantines 2). Reference allow-list reads context["known_candidates"] via _allow_list_from_context; inert until a resolver populates it. SCOPE.md updated (executor bullet + ADR list); no INTENT drift. New tests: happy-path count cap, oversized-string guardrail, allow-list rejection. Full suite: 218 passed, 1 skipped. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -475,6 +475,62 @@ def test_resilient_report_quarantines_one_bad_item_among_valid():
|
||||
assert "rank" in result.report["quarantined_items"][0]["error"]
|
||||
|
||||
|
||||
# ── WP-0016-T04 producer guardrails ───────────────────────────────────────────
|
||||
|
||||
def _triage_instr() -> SimpleNamespace:
|
||||
return _instr(
|
||||
id="daily-triage-report",
|
||||
prompt="Report.",
|
||||
trusted_fields=[],
|
||||
output_schema="schemas/daily-triage-report.json",
|
||||
report_sinks=[{"type": "working-memory"}],
|
||||
)
|
||||
|
||||
|
||||
def test_guardrail_count_cap_on_valid_happy_path():
|
||||
# 9 fully-valid recommendations in a syntactically valid document: schema
|
||||
# validation passes, but the maxItems=7 count cap must keep 7 and quarantine 2.
|
||||
recs = [_valid_rec(i) for i in range(1, 10)]
|
||||
raw = json.dumps({"summary": "Triage.", "recommendations": recs})
|
||||
llm = _CountingLLM([raw])
|
||||
|
||||
result = execute_instruction_with_audit(_triage_instr(), _Event(), {}, llm)
|
||||
|
||||
assert llm.call_count == 1 # no retry — the document was valid
|
||||
assert result.report["partial"] is True
|
||||
assert len(result.report["recommendations"]) == 7
|
||||
assert result.report["quarantined_count"] == 2
|
||||
assert all(q["reason"] == "over_limit" for q in result.report["quarantined_items"])
|
||||
|
||||
|
||||
def test_guardrail_oversized_string_quarantined():
|
||||
big = _valid_rec(2)
|
||||
big["why"] = "x" * 5000 # exceeds _MAX_STRING_LEN
|
||||
raw = json.dumps({"summary": "Triage.", "recommendations": [_valid_rec(1), big]})
|
||||
llm = _CountingLLM([raw])
|
||||
|
||||
result = execute_instruction_with_audit(_triage_instr(), _Event(), {}, llm)
|
||||
|
||||
assert len(result.report["recommendations"]) == 1
|
||||
assert result.report["quarantined_count"] == 1
|
||||
assert result.report["quarantined_items"][0]["reason"] == "guardrail"
|
||||
|
||||
|
||||
def test_guardrail_allow_list_rejects_unknown_candidate():
|
||||
raw = json.dumps({
|
||||
"summary": "Triage.",
|
||||
"recommendations": [_valid_rec(1), _valid_rec(2)], # candidates WS-1, WS-2
|
||||
})
|
||||
llm = _CountingLLM([raw])
|
||||
context = {"known_candidates": ["WS-1"]}
|
||||
|
||||
result = execute_instruction_with_audit(_triage_instr(), _Event(), context, llm)
|
||||
|
||||
assert len(result.report["recommendations"]) == 1
|
||||
assert result.report["recommendations"][0]["candidate"] == "WS-1"
|
||||
assert result.report["quarantined_items"][0]["reason"] == "allow_list"
|
||||
|
||||
|
||||
def test_execute_instruction_with_audit_preserves_invalid_report_with_sinks(
|
||||
tmp_path,
|
||||
monkeypatch,
|
||||
|
||||
Reference in New Issue
Block a user