generated from coulomb/repo-seed
Wire instruction report execution
This commit is contained in:
15
schemas/daily-triage-report.json
Normal file
15
schemas/daily-triage-report.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"type": "object",
|
||||
"required": ["summary", "recommendations"],
|
||||
"properties": {
|
||||
"summary": {
|
||||
"type": "string"
|
||||
},
|
||||
"recommendations": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -25,6 +25,9 @@ from activity_core.issue_sink import get_issue_sink
|
||||
from activity_core.orm import ActivityDefinition as ActivityDefinitionRow
|
||||
from activity_core.orm import ActivityRun, TaskInstance, TaskSpawnLog
|
||||
from activity_core.rules import evaluate_condition
|
||||
from activity_core.llm_client import get_llm_client
|
||||
from activity_core.models import InstructionDef
|
||||
from activity_core.rules.executor import execute_instruction_with_audit
|
||||
|
||||
|
||||
_session_factory: async_sessionmaker[AsyncSession] | None = None
|
||||
@@ -267,6 +270,75 @@ async def evaluate_rules(payload: dict) -> list[dict]:
|
||||
return matched
|
||||
|
||||
|
||||
@activity.defn
|
||||
async def evaluate_instructions(payload: dict) -> dict:
|
||||
"""Evaluate instruction blocks and return task specs/reports with audit fields.
|
||||
|
||||
Expected keys in payload:
|
||||
instructions list[dict] — InstructionDef serialised dicts
|
||||
event dict — EventEnvelope attributes (or empty for cron)
|
||||
context dict — context snapshot from resolve_context
|
||||
"""
|
||||
instructions = payload.get("instructions", [])
|
||||
event_attrs = payload.get("event", {})
|
||||
context = payload.get("context", {})
|
||||
llm_client = get_llm_client()
|
||||
|
||||
class _Env:
|
||||
def __init__(self, attrs: dict) -> None:
|
||||
self.attributes = _DictObj(attrs)
|
||||
|
||||
class _DictObj:
|
||||
def __init__(self, d: dict) -> None:
|
||||
self.__dict__.update(d)
|
||||
|
||||
event_obj = _Env(event_attrs)
|
||||
|
||||
task_specs: list[dict] = []
|
||||
reports: list[dict] = []
|
||||
for raw_instruction in instructions:
|
||||
try:
|
||||
instruction = InstructionDef.model_validate(raw_instruction)
|
||||
except Exception as exc:
|
||||
activity.logger.warning("instruction definition invalid — %s", exc)
|
||||
continue
|
||||
|
||||
result = execute_instruction_with_audit(
|
||||
instruction,
|
||||
event_obj,
|
||||
context,
|
||||
llm_client,
|
||||
)
|
||||
if result.report is not None:
|
||||
reports.append({
|
||||
"instruction_id": instruction.id,
|
||||
"report": result.report,
|
||||
"condition": result.condition_matched,
|
||||
"prompt_hash": result.prompt_hash,
|
||||
"model": result.model,
|
||||
"output_validated": result.output_validated,
|
||||
"review_required": result.review_required,
|
||||
})
|
||||
for spec in result.tasks:
|
||||
task_specs.append({
|
||||
"title": spec.title,
|
||||
"description": spec.description,
|
||||
"target_repo": spec.target_repo,
|
||||
"priority": spec.priority,
|
||||
"labels": spec.labels,
|
||||
"due_in_days": spec.due_in_days,
|
||||
"source_type": "instruction",
|
||||
"source_id": instruction.id,
|
||||
"condition": result.condition_matched,
|
||||
"prompt_hash": result.prompt_hash,
|
||||
"model": result.model,
|
||||
"output_validated": result.output_validated,
|
||||
"review_required": result.review_required,
|
||||
})
|
||||
|
||||
return {"task_specs": task_specs, "reports": reports}
|
||||
|
||||
|
||||
@activity.defn
|
||||
async def emit_tasks(payload: dict) -> list[str]:
|
||||
"""Emit TaskSpecs to IssueSink and write task_spawn_log rows.
|
||||
@@ -316,6 +388,10 @@ async def emit_tasks(payload: dict) -> list[str]:
|
||||
triggering_event_id=triggering_event_id,
|
||||
task_ref=ref.external_id,
|
||||
condition_matched=spec_dict.get("condition"),
|
||||
prompt_hash=spec_dict.get("prompt_hash"),
|
||||
model=spec_dict.get("model"),
|
||||
output_validated=spec_dict.get("output_validated"),
|
||||
review_required=spec_dict.get("review_required"),
|
||||
)
|
||||
session.add(log_row)
|
||||
except Exception as exc:
|
||||
|
||||
57
src/activity_core/llm_client.py
Normal file
57
src/activity_core/llm_client.py
Normal file
@@ -0,0 +1,57 @@
|
||||
"""llm-connect adapter for instruction execution.
|
||||
|
||||
activity-core deliberately talks to llm-connect over its small HTTP surface
|
||||
instead of importing provider-specific SDKs. This keeps the activity worker on
|
||||
owned infrastructure while leaving provider selection, API keys, and model
|
||||
routing behind the existing llm-connect boundary.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
|
||||
|
||||
class DisabledLLMClient:
|
||||
"""LLM client used when no llm-connect endpoint is configured."""
|
||||
|
||||
def complete(self, prompt: str, model: str = "") -> str: # noqa: ARG002
|
||||
raise RuntimeError("LLM_CONNECT_URL is not configured")
|
||||
|
||||
|
||||
class LLMConnectClient:
|
||||
"""Small synchronous client for llm-connect server mode."""
|
||||
|
||||
def __init__(self, base_url: str, timeout_seconds: float = 300.0) -> None:
|
||||
self.base_url = base_url.rstrip("/")
|
||||
self.timeout_seconds = timeout_seconds
|
||||
|
||||
def complete(self, prompt: str, model: str = "") -> str:
|
||||
payload: dict[str, Any] = {
|
||||
"prompt": prompt,
|
||||
"config": {
|
||||
"model_name": model,
|
||||
"timeout_seconds": int(self.timeout_seconds),
|
||||
},
|
||||
}
|
||||
resp = httpx.post(
|
||||
f"{self.base_url}/execute",
|
||||
json=payload,
|
||||
timeout=self.timeout_seconds,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
content = data.get("content")
|
||||
if not isinstance(content, str):
|
||||
raise ValueError("llm-connect response missing string content")
|
||||
return content
|
||||
|
||||
|
||||
def get_llm_client() -> DisabledLLMClient | LLMConnectClient:
|
||||
base_url = os.environ.get("LLM_CONNECT_URL", "").strip()
|
||||
if not base_url:
|
||||
return DisabledLLMClient()
|
||||
timeout = float(os.environ.get("LLM_CONNECT_TIMEOUT_SECONDS", "300"))
|
||||
return LLMConnectClient(base_url, timeout)
|
||||
@@ -11,6 +11,8 @@ import hashlib
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from activity_core.rules.evaluator import UnsafeExpression, evaluate_condition
|
||||
@@ -26,6 +28,19 @@ class UntrustedFieldError(ValueError):
|
||||
"""Raised when a prompt placeholder references a field not in trusted_fields."""
|
||||
|
||||
|
||||
@dataclass
|
||||
class InstructionResult:
|
||||
"""Instruction output plus audit metadata for workflow integration."""
|
||||
|
||||
tasks: list[TaskSpec]
|
||||
report: dict[str, Any] | None = None
|
||||
prompt_hash: str | None = None
|
||||
model: str | None = None
|
||||
output_validated: bool = False
|
||||
review_required: bool = False
|
||||
condition_matched: str | None = None
|
||||
|
||||
|
||||
def _resolve_path(obj: Any, path: str) -> Any:
|
||||
"""Walk a dot-separated path on obj or dict. Returns None if not found."""
|
||||
parts = path.split(".")
|
||||
@@ -92,14 +107,24 @@ def execute_instruction(
|
||||
4. Validate response against instr.output_schema (JSON Schema). Retry once.
|
||||
5. Return list[TaskSpec].
|
||||
"""
|
||||
return execute_instruction_with_audit(instr, event, context, llm_client).tasks
|
||||
|
||||
|
||||
def execute_instruction_with_audit(
|
||||
instr: Any,
|
||||
event: Any,
|
||||
context: dict,
|
||||
llm_client: Any,
|
||||
) -> InstructionResult:
|
||||
"""Evaluate an Instruction and return task specs plus audit metadata."""
|
||||
try:
|
||||
return _execute(instr, event, context, llm_client)
|
||||
except UntrustedFieldError as exc:
|
||||
logger.warning("instruction %r rejected — %s", instr.id, exc)
|
||||
return []
|
||||
return _empty_result(instr)
|
||||
except Exception as exc:
|
||||
logger.warning("instruction %r failed — %s", instr.id, exc)
|
||||
return []
|
||||
return _empty_result(instr)
|
||||
|
||||
|
||||
def _execute(
|
||||
@@ -107,14 +132,14 @@ def _execute(
|
||||
event: Any,
|
||||
context: dict,
|
||||
llm_client: Any,
|
||||
) -> list[TaskSpec]:
|
||||
) -> InstructionResult:
|
||||
# Step 1 — pre-filter
|
||||
try:
|
||||
if instr.condition and not evaluate_condition(instr.condition, event, context):
|
||||
return []
|
||||
return _empty_result(instr)
|
||||
except UnsafeExpression as exc:
|
||||
logger.warning("instruction %r condition is unsafe — %s", instr.id, exc)
|
||||
return []
|
||||
return _empty_result(instr)
|
||||
|
||||
# Step 2 — render prompt (raises UntrustedFieldError on policy violation)
|
||||
rendered = _render_prompt(instr.prompt, instr.trusted_fields, event, context)
|
||||
@@ -124,34 +149,87 @@ def _execute(
|
||||
raw_output = llm_client.complete(rendered, model=instr.model)
|
||||
|
||||
# Step 4 — validate and optionally retry
|
||||
task_specs, error = _validate_output(raw_output, instr)
|
||||
task_specs, report, error = _validate_output(raw_output, instr)
|
||||
if error:
|
||||
retry_prompt = rendered + f"\n\nPrevious output was invalid: {error}\nPlease fix."
|
||||
raw_output = llm_client.complete(retry_prompt, model=instr.model)
|
||||
task_specs, error = _validate_output(raw_output, instr)
|
||||
task_specs, report, error = _validate_output(raw_output, instr)
|
||||
if error:
|
||||
logger.warning(
|
||||
"instruction_output_error: instruction=%r, prompt_hash=%s, error=%s",
|
||||
instr.id, prompt_hash, error,
|
||||
)
|
||||
return []
|
||||
return _empty_result(instr, prompt_hash=prompt_hash)
|
||||
|
||||
return task_specs
|
||||
return InstructionResult(
|
||||
tasks=task_specs,
|
||||
report=report,
|
||||
prompt_hash=prompt_hash,
|
||||
model=instr.model,
|
||||
output_validated=True,
|
||||
review_required=bool(getattr(instr, "review_required", False)),
|
||||
condition_matched=instr.condition or None,
|
||||
)
|
||||
|
||||
|
||||
def _validate_output(raw_output: Any, instr: Any) -> tuple[list[TaskSpec], str | None]:
|
||||
"""Parse raw LLM output into TaskSpec list. Returns (specs, error_message)."""
|
||||
def _empty_result(instr: Any, prompt_hash: str | None = None) -> InstructionResult:
|
||||
return InstructionResult(
|
||||
tasks=[],
|
||||
prompt_hash=prompt_hash,
|
||||
model=getattr(instr, "model", None),
|
||||
output_validated=False,
|
||||
review_required=bool(getattr(instr, "review_required", False)),
|
||||
condition_matched=getattr(instr, "condition", "") or None,
|
||||
)
|
||||
|
||||
|
||||
def _validate_output(
|
||||
raw_output: Any,
|
||||
instr: Any,
|
||||
) -> tuple[list[TaskSpec], dict[str, Any] | None, str | None]:
|
||||
"""Parse raw LLM output into TaskSpecs and optional report payload.
|
||||
|
||||
Accepted shapes:
|
||||
- list[task]
|
||||
- single task dict with title/description/etc.
|
||||
- {"tasks": [...], "report": {...}}
|
||||
- report-only dict, such as {"summary": "...", "recommendations": [...]}
|
||||
|
||||
Returns (specs, report, error_message).
|
||||
"""
|
||||
try:
|
||||
if isinstance(raw_output, str):
|
||||
data = json.loads(raw_output)
|
||||
else:
|
||||
data = raw_output
|
||||
|
||||
if not isinstance(data, list):
|
||||
data = [data]
|
||||
schema_error = _validate_against_schema(data, getattr(instr, "output_schema", ""))
|
||||
if schema_error:
|
||||
return [], None, schema_error
|
||||
|
||||
report: dict[str, Any] | None = None
|
||||
task_items: list[Any]
|
||||
if isinstance(data, dict) and ("tasks" in data or "report" in data):
|
||||
maybe_report = data.get("report")
|
||||
if maybe_report is not None and not isinstance(maybe_report, dict):
|
||||
return [], None, "report must be a JSON object"
|
||||
report = maybe_report
|
||||
tasks = data.get("tasks", [])
|
||||
if not isinstance(tasks, list):
|
||||
return [], None, "tasks must be a JSON array"
|
||||
task_items = tasks
|
||||
elif isinstance(data, dict) and "title" not in data:
|
||||
report = data
|
||||
task_items = []
|
||||
elif isinstance(data, list):
|
||||
task_items = data
|
||||
else:
|
||||
task_items = [data]
|
||||
|
||||
specs = []
|
||||
for item in data:
|
||||
for item in task_items:
|
||||
if not isinstance(item, dict):
|
||||
return [], None, "each task must be a JSON object"
|
||||
specs.append(TaskSpec(
|
||||
title=item.get("title", ""),
|
||||
description=item.get("description", ""),
|
||||
@@ -162,6 +240,70 @@ def _validate_output(raw_output: Any, instr: Any) -> tuple[list[TaskSpec], str |
|
||||
source_type="instruction",
|
||||
source_id=instr.id,
|
||||
))
|
||||
return specs, None
|
||||
return specs, report, None
|
||||
except (json.JSONDecodeError, AttributeError, KeyError, TypeError) as exc:
|
||||
return [], str(exc)
|
||||
return [], None, str(exc)
|
||||
|
||||
|
||||
def _validate_against_schema(data: Any, schema_path: str) -> str | None:
|
||||
if not schema_path:
|
||||
return None
|
||||
|
||||
path = Path(schema_path)
|
||||
if not path.exists():
|
||||
return None
|
||||
|
||||
try:
|
||||
schema = json.loads(path.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError) as exc:
|
||||
return f"could not read output schema: {exc}"
|
||||
|
||||
return _validate_schema_node(data, schema, "$")
|
||||
|
||||
|
||||
def _validate_schema_node(data: Any, schema: dict[str, Any], path: str) -> str | None:
|
||||
expected_type = schema.get("type")
|
||||
if expected_type and not _matches_type(data, expected_type):
|
||||
return f"{path}: expected {expected_type}"
|
||||
|
||||
if expected_type == "object":
|
||||
required = schema.get("required", [])
|
||||
if isinstance(required, list):
|
||||
for key in required:
|
||||
if isinstance(key, str) and key not in data:
|
||||
return f"{path}: missing required property {key!r}"
|
||||
properties = schema.get("properties", {})
|
||||
if isinstance(properties, dict):
|
||||
for key, child_schema in properties.items():
|
||||
if key in data and isinstance(child_schema, dict):
|
||||
error = _validate_schema_node(data[key], child_schema, f"{path}.{key}")
|
||||
if error:
|
||||
return error
|
||||
|
||||
if expected_type == "array":
|
||||
item_schema = schema.get("items")
|
||||
if isinstance(item_schema, dict):
|
||||
for index, item in enumerate(data):
|
||||
error = _validate_schema_node(item, item_schema, f"{path}[{index}]")
|
||||
if error:
|
||||
return error
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _matches_type(data: Any, expected_type: str) -> bool:
|
||||
if expected_type == "object":
|
||||
return isinstance(data, dict)
|
||||
if expected_type == "array":
|
||||
return isinstance(data, list)
|
||||
if expected_type == "string":
|
||||
return isinstance(data, str)
|
||||
if expected_type == "integer":
|
||||
return isinstance(data, int) and not isinstance(data, bool)
|
||||
if expected_type == "number":
|
||||
return isinstance(data, (int, float)) and not isinstance(data, bool)
|
||||
if expected_type == "boolean":
|
||||
return isinstance(data, bool)
|
||||
if expected_type == "null":
|
||||
return data is None
|
||||
return True
|
||||
|
||||
@@ -21,6 +21,7 @@ with workflow.unsafe.imports_passed_through():
|
||||
from activity_core.activities import (
|
||||
emit_tasks,
|
||||
evaluate_rules,
|
||||
evaluate_instructions,
|
||||
load_activity_definition,
|
||||
log_run,
|
||||
persist_task_instance,
|
||||
@@ -136,6 +137,19 @@ class RunActivityWorkflow:
|
||||
"condition": rule.get("condition", ""),
|
||||
})
|
||||
|
||||
if defn.get("instructions"):
|
||||
instruction_result: dict = await workflow.execute_activity(
|
||||
evaluate_instructions,
|
||||
{
|
||||
"instructions": defn.get("instructions", []),
|
||||
"event": event_attrs,
|
||||
"context": context_snapshot,
|
||||
},
|
||||
start_to_close_timeout=_ACTIVITY_TIMEOUT,
|
||||
retry_policy=_RETRY_POLICY,
|
||||
)
|
||||
task_spec_dicts.extend(instruction_result.get("task_specs", []))
|
||||
|
||||
# ── 4. Emit tasks via IssueSink ───────────────────────────────────────
|
||||
if trigger_key == SCHEDULED_TRIGGER_KEY:
|
||||
dedup_source = workflow.info().workflow_id
|
||||
|
||||
@@ -21,6 +21,7 @@ from activity_core.rules.executor import (
|
||||
UntrustedFieldError,
|
||||
_render_prompt,
|
||||
execute_instruction,
|
||||
execute_instruction_with_audit,
|
||||
)
|
||||
|
||||
|
||||
@@ -201,6 +202,82 @@ def test_valid_llm_output_returns_task_spec():
|
||||
assert result[0].source_type == "instruction"
|
||||
|
||||
|
||||
def test_execute_instruction_with_audit_returns_metadata():
|
||||
task_data = [{"title": "Run triage", "priority": "high"}]
|
||||
llm = _CountingLLM([json.dumps(task_data)])
|
||||
instr = _instr(
|
||||
id="daily-triage",
|
||||
condition="",
|
||||
prompt="Check State Hub.",
|
||||
trusted_fields=[],
|
||||
model="test-model",
|
||||
review_required=True,
|
||||
)
|
||||
|
||||
result = execute_instruction_with_audit(instr, _Event(), {}, llm)
|
||||
|
||||
assert len(result.tasks) == 1
|
||||
assert result.tasks[0].source_id == "daily-triage"
|
||||
assert result.prompt_hash is not None
|
||||
assert len(result.prompt_hash) == 64
|
||||
assert result.model == "test-model"
|
||||
assert result.output_validated is True
|
||||
assert result.review_required is True
|
||||
|
||||
|
||||
def test_execute_instruction_with_audit_accepts_report_payload():
|
||||
report_data = {
|
||||
"summary": "State Hub has loose ends.",
|
||||
"recommendations": [{"action": "revisit", "candidate": "CUST-WP-0045"}],
|
||||
}
|
||||
llm = _CountingLLM([json.dumps(report_data)])
|
||||
instr = _instr(
|
||||
id="daily-triage-report",
|
||||
prompt="Report.",
|
||||
trusted_fields=[],
|
||||
output_schema="schemas/daily-triage-report.json",
|
||||
)
|
||||
|
||||
result = execute_instruction_with_audit(instr, _Event(), {}, llm)
|
||||
|
||||
assert result.tasks == []
|
||||
assert result.report == report_data
|
||||
assert result.output_validated is True
|
||||
|
||||
|
||||
def test_execute_instruction_with_audit_rejects_invalid_report_schema():
|
||||
report_data = {"summary": "Missing recommendations."}
|
||||
llm = _CountingLLM([json.dumps(report_data), json.dumps(report_data)])
|
||||
instr = _instr(
|
||||
id="daily-triage-report",
|
||||
prompt="Report.",
|
||||
trusted_fields=[],
|
||||
output_schema="schemas/daily-triage-report.json",
|
||||
)
|
||||
|
||||
result = execute_instruction_with_audit(instr, _Event(), {}, llm)
|
||||
|
||||
assert result.tasks == []
|
||||
assert result.report is None
|
||||
assert result.output_validated is False
|
||||
assert llm.call_count == 2
|
||||
|
||||
|
||||
def test_execute_instruction_with_audit_accepts_report_and_tasks_envelope():
|
||||
envelope = {
|
||||
"report": {"summary": "Review needed."},
|
||||
"tasks": [{"title": "Inspect CUST-WP-0045"}],
|
||||
}
|
||||
llm = _CountingLLM([json.dumps(envelope)])
|
||||
instr = _instr(id="daily-triage-report", prompt="Report.", trusted_fields=[])
|
||||
|
||||
result = execute_instruction_with_audit(instr, _Event(), {}, llm)
|
||||
|
||||
assert result.report == {"summary": "Review needed."}
|
||||
assert len(result.tasks) == 1
|
||||
assert result.tasks[0].title == "Inspect CUST-WP-0045"
|
||||
|
||||
|
||||
# ── Condition pre-filter ───────────────────────────────────────────────────────
|
||||
|
||||
def test_condition_false_skips_llm():
|
||||
|
||||
116
tests/test_instruction_evaluation.py
Normal file
116
tests/test_instruction_evaluation.py
Normal file
@@ -0,0 +1,116 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
from activity_core import activities
|
||||
|
||||
|
||||
class FakeLLMClient:
|
||||
def __init__(self, response: str) -> None:
|
||||
self.response = response
|
||||
self.calls: list[tuple[str, str]] = []
|
||||
|
||||
def complete(self, prompt: str, model: str = "") -> str:
|
||||
self.calls.append((prompt, model))
|
||||
return self.response
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_evaluate_instructions_returns_task_specs_with_audit(monkeypatch) -> None:
|
||||
llm = FakeLLMClient(json.dumps([
|
||||
{
|
||||
"title": "Run daily triage",
|
||||
"description": "Review State Hub loose ends.",
|
||||
"priority": "high",
|
||||
"labels": ["triage"],
|
||||
}
|
||||
]))
|
||||
monkeypatch.setattr(activities, "get_llm_client", lambda: llm)
|
||||
|
||||
result = await activities.evaluate_instructions({
|
||||
"instructions": [
|
||||
{
|
||||
"id": "daily-triage",
|
||||
"trusted_fields": ["context.summary.open_tasks"],
|
||||
"model": "test-model",
|
||||
"prompt": "Open tasks: {context.summary.open_tasks}",
|
||||
"output_schema": "",
|
||||
"review_required": False,
|
||||
}
|
||||
],
|
||||
"event": {},
|
||||
"context": {"summary": {"open_tasks": 3}},
|
||||
})
|
||||
|
||||
task_specs = result["task_specs"]
|
||||
assert len(task_specs) == 1
|
||||
spec = task_specs[0]
|
||||
assert spec["title"] == "Run daily triage"
|
||||
assert spec["source_type"] == "instruction"
|
||||
assert spec["source_id"] == "daily-triage"
|
||||
assert spec["model"] == "test-model"
|
||||
assert spec["output_validated"] is True
|
||||
assert spec["review_required"] is False
|
||||
assert spec["prompt_hash"] is not None
|
||||
assert len(spec["prompt_hash"]) == 64
|
||||
assert result["reports"] == []
|
||||
assert llm.calls == [("Open tasks: 3", "test-model")]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_evaluate_instructions_returns_report_payload(monkeypatch) -> None:
|
||||
llm = FakeLLMClient(json.dumps({
|
||||
"summary": "State Hub has open loose ends.",
|
||||
"recommendations": [{"candidate": "CUST-WP-0045", "action": "work-next"}],
|
||||
}))
|
||||
monkeypatch.setattr(activities, "get_llm_client", lambda: llm)
|
||||
|
||||
result = await activities.evaluate_instructions({
|
||||
"instructions": [
|
||||
{
|
||||
"id": "daily-triage-report",
|
||||
"trusted_fields": [],
|
||||
"model": "test-model",
|
||||
"prompt": "Run report.",
|
||||
"output_schema": "schemas/daily-triage-report.json",
|
||||
"review_required": False,
|
||||
}
|
||||
],
|
||||
"event": {},
|
||||
"context": {},
|
||||
})
|
||||
|
||||
assert result["task_specs"] == []
|
||||
assert len(result["reports"]) == 1
|
||||
report = result["reports"][0]
|
||||
assert report["instruction_id"] == "daily-triage-report"
|
||||
assert report["report"]["summary"] == "State Hub has open loose ends."
|
||||
assert report["output_validated"] is True
|
||||
assert report["prompt_hash"] is not None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_evaluate_instructions_without_llm_client_returns_no_tasks(monkeypatch) -> None:
|
||||
class RaisingClient:
|
||||
def complete(self, prompt: str, model: str = "") -> str: # noqa: ARG002
|
||||
raise RuntimeError("not configured")
|
||||
|
||||
monkeypatch.setattr(activities, "get_llm_client", lambda: RaisingClient())
|
||||
|
||||
result = await activities.evaluate_instructions({
|
||||
"instructions": [
|
||||
{
|
||||
"id": "daily-triage",
|
||||
"trusted_fields": [],
|
||||
"model": "test-model",
|
||||
"prompt": "Run triage.",
|
||||
"output_schema": "schemas/daily-triage-report.json",
|
||||
}
|
||||
],
|
||||
"event": {},
|
||||
"context": {},
|
||||
})
|
||||
|
||||
assert result == {"task_specs": [], "reports": []}
|
||||
Reference in New Issue
Block a user