Files
activity-core/tests/test_instruction_evaluation.py

232 lines
7.3 KiB
Python

from __future__ import annotations
import json
import pytest
from activity_core import activities
class FakeLLMClient:
def __init__(self, response: str) -> None:
self.response = response
self.calls: list[tuple[str, str, dict | None]] = []
def complete(
self,
prompt: str,
model: str = "",
config: dict | None = None,
) -> str:
self.calls.append((prompt, model, config))
return self.response
@pytest.mark.asyncio
async def test_evaluate_instructions_returns_task_specs_with_audit(monkeypatch) -> None:
llm = FakeLLMClient(json.dumps([
{
"title": "Run daily triage",
"description": "Review State Hub loose ends.",
"priority": "high",
"labels": ["triage"],
}
]))
monkeypatch.setattr(activities, "get_llm_client", lambda: llm)
result = await activities.evaluate_instructions({
"instructions": [
{
"id": "daily-triage",
"trusted_fields": ["context.summary.open_tasks"],
"model": "test-model",
"prompt": "Open tasks: {context.summary.open_tasks}",
"output_schema": "",
"review_required": False,
}
],
"event": {},
"context": {"summary": {"open_tasks": 3}},
})
task_specs = result["task_specs"]
assert len(task_specs) == 1
spec = task_specs[0]
assert spec["title"] == "Run daily triage"
assert spec["source_type"] == "instruction"
assert spec["source_id"] == "daily-triage"
assert spec["model"] == "test-model"
assert spec["output_validated"] is True
assert spec["review_required"] is False
assert spec["prompt_hash"] is not None
assert len(spec["prompt_hash"]) == 64
assert result["reports"] == []
assert llm.calls == [
("Open tasks: 3", "test-model", {"model_name": "test-model"})
]
@pytest.mark.asyncio
async def test_evaluate_instructions_returns_report_payload(monkeypatch) -> None:
llm = FakeLLMClient(json.dumps({
"summary": "State Hub has open loose ends.",
"recommendations": [{"candidate": "CUST-WP-0045", "action": "work-next"}],
}))
monkeypatch.setattr(activities, "get_llm_client", lambda: llm)
result = await activities.evaluate_instructions({
"instructions": [
{
"id": "daily-triage-report",
"trusted_fields": [],
"model": "test-model",
"prompt": "Run report.",
"output_schema": "schemas/daily-triage-report.json",
"review_required": False,
}
],
"event": {},
"context": {},
})
assert result["task_specs"] == []
assert len(result["reports"]) == 1
report = result["reports"][0]
assert report["instruction_id"] == "daily-triage-report"
assert report["report"]["summary"] == "State Hub has open loose ends."
assert report["output_validated"] is True
assert report["prompt_hash"] is not None
@pytest.mark.asyncio
async def test_evaluate_instructions_returns_invalid_report_for_report_sinks(
monkeypatch,
tmp_path,
) -> None:
schema_dir = tmp_path / "schemas"
schema_dir.mkdir()
(schema_dir / "daily-triage-report.json").write_text(
json.dumps({
"type": "object",
"required": ["summary", "recommendations"],
"properties": {
"summary": {"type": "string"},
"recommendations": {
"type": "array",
"items": {
"type": "object",
"required": ["wsjf"],
},
},
},
}),
encoding="utf-8",
)
monkeypatch.chdir(tmp_path)
llm = FakeLLMClient(json.dumps({
"summary": "Partial triage.",
"recommendations": [{"rank": 1, "candidate": "CUST-WP-0045"}],
}))
monkeypatch.setattr(activities, "get_llm_client", lambda: llm)
result = await activities.evaluate_instructions({
"instructions": [
{
"id": "daily-triage-report",
"trusted_fields": [],
"model": "test-model",
"prompt": "Run report.",
"output_schema": "schemas/daily-triage-report.json",
"review_required": False,
"report_sinks": [{"type": "working-memory", "path": "/tmp"}],
}
],
"event": {},
"context": {},
})
assert result["task_specs"] == []
assert len(result["reports"]) == 1
report = result["reports"][0]
assert report["output_validated"] is False
assert report["review_required"] is True
assert report["validation_error"] == "$.recommendations[0]: missing required property 'wsjf'"
assert report["report"]["status"] == "validation_failed"
assert report["report"]["partial_summary"] == "Partial triage."
@pytest.mark.asyncio
async def test_evaluate_instructions_without_llm_client_returns_no_tasks(monkeypatch) -> None:
class RaisingClient:
def complete(
self,
prompt: str,
model: str = "",
config: dict | None = None,
) -> str: # noqa: ARG002
raise RuntimeError("not configured")
monkeypatch.setattr(activities, "get_llm_client", lambda: RaisingClient())
result = await activities.evaluate_instructions({
"instructions": [
{
"id": "daily-triage",
"trusted_fields": [],
"model": "test-model",
"prompt": "Run triage.",
"output_schema": "schemas/daily-triage-report.json",
}
],
"event": {},
"context": {},
})
assert result == {"task_specs": [], "reports": []}
@pytest.mark.asyncio
async def test_evaluate_instructions_forwards_llm_connect_depth_config(monkeypatch) -> None:
llm = FakeLLMClient(json.dumps({"summary": "ok", "recommendations": []}))
monkeypatch.setattr(activities, "get_llm_client", lambda: llm)
await activities.evaluate_instructions({
"instructions": [
{
"id": "daily-triage-report",
"trusted_fields": [],
"model": "custodian-triage-balanced",
"temperature": 0.2,
"max_tokens": 1200,
"max_depth": 2,
"model_params": {"reasoning_effort": "medium"},
"prompt": "Run report.",
"output_schema": "schemas/daily-triage-report.json",
"review_required": False,
}
],
"event": {},
"context": {},
})
assert llm.calls[0][2] == {
"model_name": "custodian-triage-balanced",
"temperature": 0.2,
"max_tokens": 1200,
"max_depth": 2,
"model_params": {
"reasoning_effort": "medium",
"json_schema": {
"type": "object",
"required": ["summary", "recommendations"],
"properties": {
"summary": {"type": "string"},
"recommendations": {
"type": "array",
"items": {"type": "object"},
},
},
},
},
}