generated from coulomb/repo-seed
Wire instruction report execution
This commit is contained in:
@@ -21,6 +21,7 @@ from activity_core.rules.executor import (
|
||||
UntrustedFieldError,
|
||||
_render_prompt,
|
||||
execute_instruction,
|
||||
execute_instruction_with_audit,
|
||||
)
|
||||
|
||||
|
||||
@@ -201,6 +202,82 @@ def test_valid_llm_output_returns_task_spec():
|
||||
assert result[0].source_type == "instruction"
|
||||
|
||||
|
||||
def test_execute_instruction_with_audit_returns_metadata():
|
||||
task_data = [{"title": "Run triage", "priority": "high"}]
|
||||
llm = _CountingLLM([json.dumps(task_data)])
|
||||
instr = _instr(
|
||||
id="daily-triage",
|
||||
condition="",
|
||||
prompt="Check State Hub.",
|
||||
trusted_fields=[],
|
||||
model="test-model",
|
||||
review_required=True,
|
||||
)
|
||||
|
||||
result = execute_instruction_with_audit(instr, _Event(), {}, llm)
|
||||
|
||||
assert len(result.tasks) == 1
|
||||
assert result.tasks[0].source_id == "daily-triage"
|
||||
assert result.prompt_hash is not None
|
||||
assert len(result.prompt_hash) == 64
|
||||
assert result.model == "test-model"
|
||||
assert result.output_validated is True
|
||||
assert result.review_required is True
|
||||
|
||||
|
||||
def test_execute_instruction_with_audit_accepts_report_payload():
|
||||
report_data = {
|
||||
"summary": "State Hub has loose ends.",
|
||||
"recommendations": [{"action": "revisit", "candidate": "CUST-WP-0045"}],
|
||||
}
|
||||
llm = _CountingLLM([json.dumps(report_data)])
|
||||
instr = _instr(
|
||||
id="daily-triage-report",
|
||||
prompt="Report.",
|
||||
trusted_fields=[],
|
||||
output_schema="schemas/daily-triage-report.json",
|
||||
)
|
||||
|
||||
result = execute_instruction_with_audit(instr, _Event(), {}, llm)
|
||||
|
||||
assert result.tasks == []
|
||||
assert result.report == report_data
|
||||
assert result.output_validated is True
|
||||
|
||||
|
||||
def test_execute_instruction_with_audit_rejects_invalid_report_schema():
|
||||
report_data = {"summary": "Missing recommendations."}
|
||||
llm = _CountingLLM([json.dumps(report_data), json.dumps(report_data)])
|
||||
instr = _instr(
|
||||
id="daily-triage-report",
|
||||
prompt="Report.",
|
||||
trusted_fields=[],
|
||||
output_schema="schemas/daily-triage-report.json",
|
||||
)
|
||||
|
||||
result = execute_instruction_with_audit(instr, _Event(), {}, llm)
|
||||
|
||||
assert result.tasks == []
|
||||
assert result.report is None
|
||||
assert result.output_validated is False
|
||||
assert llm.call_count == 2
|
||||
|
||||
|
||||
def test_execute_instruction_with_audit_accepts_report_and_tasks_envelope():
|
||||
envelope = {
|
||||
"report": {"summary": "Review needed."},
|
||||
"tasks": [{"title": "Inspect CUST-WP-0045"}],
|
||||
}
|
||||
llm = _CountingLLM([json.dumps(envelope)])
|
||||
instr = _instr(id="daily-triage-report", prompt="Report.", trusted_fields=[])
|
||||
|
||||
result = execute_instruction_with_audit(instr, _Event(), {}, llm)
|
||||
|
||||
assert result.report == {"summary": "Review needed."}
|
||||
assert len(result.tasks) == 1
|
||||
assert result.tasks[0].title == "Inspect CUST-WP-0045"
|
||||
|
||||
|
||||
# ── Condition pre-filter ───────────────────────────────────────────────────────
|
||||
|
||||
def test_condition_false_skips_llm():
|
||||
|
||||
116
tests/test_instruction_evaluation.py
Normal file
116
tests/test_instruction_evaluation.py
Normal file
@@ -0,0 +1,116 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
from activity_core import activities
|
||||
|
||||
|
||||
class FakeLLMClient:
|
||||
def __init__(self, response: str) -> None:
|
||||
self.response = response
|
||||
self.calls: list[tuple[str, str]] = []
|
||||
|
||||
def complete(self, prompt: str, model: str = "") -> str:
|
||||
self.calls.append((prompt, model))
|
||||
return self.response
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_evaluate_instructions_returns_task_specs_with_audit(monkeypatch) -> None:
|
||||
llm = FakeLLMClient(json.dumps([
|
||||
{
|
||||
"title": "Run daily triage",
|
||||
"description": "Review State Hub loose ends.",
|
||||
"priority": "high",
|
||||
"labels": ["triage"],
|
||||
}
|
||||
]))
|
||||
monkeypatch.setattr(activities, "get_llm_client", lambda: llm)
|
||||
|
||||
result = await activities.evaluate_instructions({
|
||||
"instructions": [
|
||||
{
|
||||
"id": "daily-triage",
|
||||
"trusted_fields": ["context.summary.open_tasks"],
|
||||
"model": "test-model",
|
||||
"prompt": "Open tasks: {context.summary.open_tasks}",
|
||||
"output_schema": "",
|
||||
"review_required": False,
|
||||
}
|
||||
],
|
||||
"event": {},
|
||||
"context": {"summary": {"open_tasks": 3}},
|
||||
})
|
||||
|
||||
task_specs = result["task_specs"]
|
||||
assert len(task_specs) == 1
|
||||
spec = task_specs[0]
|
||||
assert spec["title"] == "Run daily triage"
|
||||
assert spec["source_type"] == "instruction"
|
||||
assert spec["source_id"] == "daily-triage"
|
||||
assert spec["model"] == "test-model"
|
||||
assert spec["output_validated"] is True
|
||||
assert spec["review_required"] is False
|
||||
assert spec["prompt_hash"] is not None
|
||||
assert len(spec["prompt_hash"]) == 64
|
||||
assert result["reports"] == []
|
||||
assert llm.calls == [("Open tasks: 3", "test-model")]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_evaluate_instructions_returns_report_payload(monkeypatch) -> None:
|
||||
llm = FakeLLMClient(json.dumps({
|
||||
"summary": "State Hub has open loose ends.",
|
||||
"recommendations": [{"candidate": "CUST-WP-0045", "action": "work-next"}],
|
||||
}))
|
||||
monkeypatch.setattr(activities, "get_llm_client", lambda: llm)
|
||||
|
||||
result = await activities.evaluate_instructions({
|
||||
"instructions": [
|
||||
{
|
||||
"id": "daily-triage-report",
|
||||
"trusted_fields": [],
|
||||
"model": "test-model",
|
||||
"prompt": "Run report.",
|
||||
"output_schema": "schemas/daily-triage-report.json",
|
||||
"review_required": False,
|
||||
}
|
||||
],
|
||||
"event": {},
|
||||
"context": {},
|
||||
})
|
||||
|
||||
assert result["task_specs"] == []
|
||||
assert len(result["reports"]) == 1
|
||||
report = result["reports"][0]
|
||||
assert report["instruction_id"] == "daily-triage-report"
|
||||
assert report["report"]["summary"] == "State Hub has open loose ends."
|
||||
assert report["output_validated"] is True
|
||||
assert report["prompt_hash"] is not None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_evaluate_instructions_without_llm_client_returns_no_tasks(monkeypatch) -> None:
|
||||
class RaisingClient:
|
||||
def complete(self, prompt: str, model: str = "") -> str: # noqa: ARG002
|
||||
raise RuntimeError("not configured")
|
||||
|
||||
monkeypatch.setattr(activities, "get_llm_client", lambda: RaisingClient())
|
||||
|
||||
result = await activities.evaluate_instructions({
|
||||
"instructions": [
|
||||
{
|
||||
"id": "daily-triage",
|
||||
"trusted_fields": [],
|
||||
"model": "test-model",
|
||||
"prompt": "Run triage.",
|
||||
"output_schema": "schemas/daily-triage-report.json",
|
||||
}
|
||||
],
|
||||
"event": {},
|
||||
"context": {},
|
||||
})
|
||||
|
||||
assert result == {"task_specs": [], "reports": []}
|
||||
Reference in New Issue
Block a user