generated from coulomb/repo-seed
Harden WSJF triage report recovery
This commit is contained in:
@@ -4,7 +4,8 @@ Covers:
|
||||
- UntrustedFieldError raised when prompt references untrusted field
|
||||
- Object-type attribute rejected even when listed in trusted_fields
|
||||
- Injection fixture: untrusted field raises UntrustedFieldError before rendering
|
||||
- Schema validation: NullLLM returning invalid JSON → retry → second invalid → []
|
||||
- Schema validation: invalid JSON retries once; report-sink instructions preserve
|
||||
a validation-failure artifact after the second invalid output.
|
||||
- review_required flag: present on InstructionDef model
|
||||
"""
|
||||
|
||||
@@ -98,6 +99,7 @@ def _instr(
|
||||
max_tokens: int | None = None,
|
||||
max_depth: int | None = None,
|
||||
model_params: dict[str, Any] | None = None,
|
||||
report_sinks: list[dict[str, Any]] | None = None,
|
||||
) -> SimpleNamespace:
|
||||
return SimpleNamespace(
|
||||
id=id,
|
||||
@@ -111,6 +113,7 @@ def _instr(
|
||||
model_params=model_params or {},
|
||||
output_schema=output_schema,
|
||||
review_required=review_required,
|
||||
report_sinks=report_sinks or [],
|
||||
)
|
||||
|
||||
|
||||
@@ -353,6 +356,58 @@ def test_execute_instruction_with_audit_rejects_invalid_report_schema():
|
||||
assert llm.call_count == 2
|
||||
|
||||
|
||||
def test_execute_instruction_with_audit_preserves_invalid_report_with_sinks(
|
||||
tmp_path,
|
||||
monkeypatch,
|
||||
):
|
||||
schema_dir = tmp_path / "schemas"
|
||||
schema_dir.mkdir()
|
||||
schema_path = schema_dir / "daily-triage-report.json"
|
||||
schema_path.write_text(
|
||||
json.dumps({
|
||||
"type": "object",
|
||||
"required": ["summary", "recommendations"],
|
||||
"properties": {
|
||||
"summary": {"type": "string"},
|
||||
"recommendations": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"required": ["action"],
|
||||
},
|
||||
},
|
||||
},
|
||||
}),
|
||||
encoding="utf-8",
|
||||
)
|
||||
monkeypatch.chdir(tmp_path)
|
||||
|
||||
report_data = {
|
||||
"summary": "Generated partial triage.",
|
||||
"recommendations": [{"rank": 1, "candidate": "CUST-WP-0045"}],
|
||||
}
|
||||
llm = _CountingLLM([json.dumps(report_data), json.dumps(report_data)])
|
||||
instr = _instr(
|
||||
id="daily-triage-report",
|
||||
prompt="Report.",
|
||||
trusted_fields=[],
|
||||
output_schema="schemas/daily-triage-report.json",
|
||||
report_sinks=[{"type": "working-memory", "path": "/tmp"}],
|
||||
)
|
||||
|
||||
result = execute_instruction_with_audit(instr, _Event(), {}, llm)
|
||||
|
||||
assert result.tasks == []
|
||||
assert result.output_validated is False
|
||||
assert result.review_required is True
|
||||
assert result.validation_error == "$.recommendations[0]: missing required property 'action'"
|
||||
assert result.report is not None
|
||||
assert result.report["status"] == "validation_failed"
|
||||
assert result.report["partial_summary"] == "Generated partial triage."
|
||||
assert result.report["partial_report"] == report_data
|
||||
assert llm.call_count == 2
|
||||
|
||||
|
||||
def test_execute_instruction_with_audit_accepts_report_and_tasks_envelope():
|
||||
envelope = {
|
||||
"report": {"summary": "Review needed."},
|
||||
|
||||
@@ -54,3 +54,10 @@ def test_daily_triage_verifier_dry_run_names_all_operator_checks() -> None:
|
||||
assert "where id = '00000000-0000-0000-0000-000000000123'" in timeout_check["sql"]
|
||||
assert timeout_check["activity_timeout_seconds"] == 900
|
||||
assert timeout_check["retry_attempts"] == 10
|
||||
|
||||
|
||||
def test_daily_triage_verifier_default_working_memory_dir() -> None:
|
||||
script = _load_script()
|
||||
args = script.parse_args([])
|
||||
|
||||
assert args.working_memory_dir == "/home/worsch/the-custodian/memory/working"
|
||||
|
||||
@@ -98,6 +98,63 @@ async def test_evaluate_instructions_returns_report_payload(monkeypatch) -> None
|
||||
assert report["prompt_hash"] is not None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_evaluate_instructions_returns_invalid_report_for_report_sinks(
|
||||
monkeypatch,
|
||||
tmp_path,
|
||||
) -> None:
|
||||
schema_dir = tmp_path / "schemas"
|
||||
schema_dir.mkdir()
|
||||
(schema_dir / "daily-triage-report.json").write_text(
|
||||
json.dumps({
|
||||
"type": "object",
|
||||
"required": ["summary", "recommendations"],
|
||||
"properties": {
|
||||
"summary": {"type": "string"},
|
||||
"recommendations": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"required": ["wsjf"],
|
||||
},
|
||||
},
|
||||
},
|
||||
}),
|
||||
encoding="utf-8",
|
||||
)
|
||||
monkeypatch.chdir(tmp_path)
|
||||
llm = FakeLLMClient(json.dumps({
|
||||
"summary": "Partial triage.",
|
||||
"recommendations": [{"rank": 1, "candidate": "CUST-WP-0045"}],
|
||||
}))
|
||||
monkeypatch.setattr(activities, "get_llm_client", lambda: llm)
|
||||
|
||||
result = await activities.evaluate_instructions({
|
||||
"instructions": [
|
||||
{
|
||||
"id": "daily-triage-report",
|
||||
"trusted_fields": [],
|
||||
"model": "test-model",
|
||||
"prompt": "Run report.",
|
||||
"output_schema": "schemas/daily-triage-report.json",
|
||||
"review_required": False,
|
||||
"report_sinks": [{"type": "working-memory", "path": "/tmp"}],
|
||||
}
|
||||
],
|
||||
"event": {},
|
||||
"context": {},
|
||||
})
|
||||
|
||||
assert result["task_specs"] == []
|
||||
assert len(result["reports"]) == 1
|
||||
report = result["reports"][0]
|
||||
assert report["output_validated"] is False
|
||||
assert report["review_required"] is True
|
||||
assert report["validation_error"] == "$.recommendations[0]: missing required property 'wsjf'"
|
||||
assert report["report"]["status"] == "validation_failed"
|
||||
assert report["report"]["partial_summary"] == "Partial triage."
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_evaluate_instructions_without_llm_client_returns_no_tasks(monkeypatch) -> None:
|
||||
class RaisingClient:
|
||||
|
||||
@@ -32,6 +32,11 @@ def _payload(sinks: list[dict[str, Any]]) -> dict[str, Any]:
|
||||
"recommendations": [{"candidate": "CUST-WP-0045"}],
|
||||
},
|
||||
"sinks": sinks,
|
||||
"prompt_hash": "abc123",
|
||||
"model": "test-model",
|
||||
"output_validated": True,
|
||||
"review_required": False,
|
||||
"validation_error": None,
|
||||
}
|
||||
],
|
||||
}
|
||||
@@ -54,6 +59,9 @@ def test_working_memory_sink_writes_idempotently(tmp_path) -> None:
|
||||
note = tmp_path / "daily-triage-2026-05-19-12345678.md"
|
||||
text = note.read_text(encoding="utf-8")
|
||||
assert "activity_core_run_id: 12345678-aaaa-bbbb-cccc-123456789abc" in text
|
||||
assert "output_validated: true" in text
|
||||
assert "review_required: false" in text
|
||||
assert "model: test-model" in text
|
||||
assert "State Hub has loose ends." in text
|
||||
|
||||
|
||||
@@ -103,6 +111,8 @@ def test_state_hub_progress_sink_posts(monkeypatch) -> None:
|
||||
assert posts[0]["url"] == "http://state-hub.test/progress/"
|
||||
assert posts[0]["json"]["workstream_id"] == "workstream-1"
|
||||
assert posts[0]["json"]["detail"]["activity_core_run_id"] == payload_run_id()
|
||||
assert posts[0]["json"]["detail"]["output_validated"] is True
|
||||
assert posts[0]["json"]["detail"]["review_required"] is False
|
||||
|
||||
|
||||
def test_state_hub_progress_sink_is_idempotent(monkeypatch) -> None:
|
||||
|
||||
Reference in New Issue
Block a user