Wire instruction report execution

This commit is contained in:
2026-05-19 18:28:23 +02:00
parent 0e7084207e
commit 0dc342eb1b
7 changed files with 513 additions and 16 deletions

View File

@@ -0,0 +1,15 @@
{
"type": "object",
"required": ["summary", "recommendations"],
"properties": {
"summary": {
"type": "string"
},
"recommendations": {
"type": "array",
"items": {
"type": "object"
}
}
}
}

View File

@@ -25,6 +25,9 @@ from activity_core.issue_sink import get_issue_sink
from activity_core.orm import ActivityDefinition as ActivityDefinitionRow
from activity_core.orm import ActivityRun, TaskInstance, TaskSpawnLog
from activity_core.rules import evaluate_condition
from activity_core.llm_client import get_llm_client
from activity_core.models import InstructionDef
from activity_core.rules.executor import execute_instruction_with_audit
_session_factory: async_sessionmaker[AsyncSession] | None = None
@@ -267,6 +270,75 @@ async def evaluate_rules(payload: dict) -> list[dict]:
return matched
@activity.defn
async def evaluate_instructions(payload: dict) -> dict:
"""Evaluate instruction blocks and return task specs/reports with audit fields.
Expected keys in payload:
instructions list[dict] — InstructionDef serialised dicts
event dict — EventEnvelope attributes (or empty for cron)
context dict — context snapshot from resolve_context
"""
instructions = payload.get("instructions", [])
event_attrs = payload.get("event", {})
context = payload.get("context", {})
llm_client = get_llm_client()
class _Env:
def __init__(self, attrs: dict) -> None:
self.attributes = _DictObj(attrs)
class _DictObj:
def __init__(self, d: dict) -> None:
self.__dict__.update(d)
event_obj = _Env(event_attrs)
task_specs: list[dict] = []
reports: list[dict] = []
for raw_instruction in instructions:
try:
instruction = InstructionDef.model_validate(raw_instruction)
except Exception as exc:
activity.logger.warning("instruction definition invalid — %s", exc)
continue
result = execute_instruction_with_audit(
instruction,
event_obj,
context,
llm_client,
)
if result.report is not None:
reports.append({
"instruction_id": instruction.id,
"report": result.report,
"condition": result.condition_matched,
"prompt_hash": result.prompt_hash,
"model": result.model,
"output_validated": result.output_validated,
"review_required": result.review_required,
})
for spec in result.tasks:
task_specs.append({
"title": spec.title,
"description": spec.description,
"target_repo": spec.target_repo,
"priority": spec.priority,
"labels": spec.labels,
"due_in_days": spec.due_in_days,
"source_type": "instruction",
"source_id": instruction.id,
"condition": result.condition_matched,
"prompt_hash": result.prompt_hash,
"model": result.model,
"output_validated": result.output_validated,
"review_required": result.review_required,
})
return {"task_specs": task_specs, "reports": reports}
@activity.defn
async def emit_tasks(payload: dict) -> list[str]:
"""Emit TaskSpecs to IssueSink and write task_spawn_log rows.
@@ -316,6 +388,10 @@ async def emit_tasks(payload: dict) -> list[str]:
triggering_event_id=triggering_event_id,
task_ref=ref.external_id,
condition_matched=spec_dict.get("condition"),
prompt_hash=spec_dict.get("prompt_hash"),
model=spec_dict.get("model"),
output_validated=spec_dict.get("output_validated"),
review_required=spec_dict.get("review_required"),
)
session.add(log_row)
except Exception as exc:

View File

@@ -0,0 +1,57 @@
"""llm-connect adapter for instruction execution.
activity-core deliberately talks to llm-connect over its small HTTP surface
instead of importing provider-specific SDKs. This keeps the activity worker on
owned infrastructure while leaving provider selection, API keys, and model
routing behind the existing llm-connect boundary.
"""
from __future__ import annotations
import os
from typing import Any
import httpx
class DisabledLLMClient:
"""LLM client used when no llm-connect endpoint is configured."""
def complete(self, prompt: str, model: str = "") -> str: # noqa: ARG002
raise RuntimeError("LLM_CONNECT_URL is not configured")
class LLMConnectClient:
"""Small synchronous client for llm-connect server mode."""
def __init__(self, base_url: str, timeout_seconds: float = 300.0) -> None:
self.base_url = base_url.rstrip("/")
self.timeout_seconds = timeout_seconds
def complete(self, prompt: str, model: str = "") -> str:
payload: dict[str, Any] = {
"prompt": prompt,
"config": {
"model_name": model,
"timeout_seconds": int(self.timeout_seconds),
},
}
resp = httpx.post(
f"{self.base_url}/execute",
json=payload,
timeout=self.timeout_seconds,
)
resp.raise_for_status()
data = resp.json()
content = data.get("content")
if not isinstance(content, str):
raise ValueError("llm-connect response missing string content")
return content
def get_llm_client() -> DisabledLLMClient | LLMConnectClient:
base_url = os.environ.get("LLM_CONNECT_URL", "").strip()
if not base_url:
return DisabledLLMClient()
timeout = float(os.environ.get("LLM_CONNECT_TIMEOUT_SECONDS", "300"))
return LLMConnectClient(base_url, timeout)

View File

@@ -11,6 +11,8 @@ import hashlib
import json
import logging
import re
from dataclasses import dataclass
from pathlib import Path
from typing import Any
from activity_core.rules.evaluator import UnsafeExpression, evaluate_condition
@@ -26,6 +28,19 @@ class UntrustedFieldError(ValueError):
"""Raised when a prompt placeholder references a field not in trusted_fields."""
@dataclass
class InstructionResult:
"""Instruction output plus audit metadata for workflow integration."""
tasks: list[TaskSpec]
report: dict[str, Any] | None = None
prompt_hash: str | None = None
model: str | None = None
output_validated: bool = False
review_required: bool = False
condition_matched: str | None = None
def _resolve_path(obj: Any, path: str) -> Any:
"""Walk a dot-separated path on obj or dict. Returns None if not found."""
parts = path.split(".")
@@ -92,14 +107,24 @@ def execute_instruction(
4. Validate response against instr.output_schema (JSON Schema). Retry once.
5. Return list[TaskSpec].
"""
return execute_instruction_with_audit(instr, event, context, llm_client).tasks
def execute_instruction_with_audit(
instr: Any,
event: Any,
context: dict,
llm_client: Any,
) -> InstructionResult:
"""Evaluate an Instruction and return task specs plus audit metadata."""
try:
return _execute(instr, event, context, llm_client)
except UntrustedFieldError as exc:
logger.warning("instruction %r rejected — %s", instr.id, exc)
return []
return _empty_result(instr)
except Exception as exc:
logger.warning("instruction %r failed — %s", instr.id, exc)
return []
return _empty_result(instr)
def _execute(
@@ -107,14 +132,14 @@ def _execute(
event: Any,
context: dict,
llm_client: Any,
) -> list[TaskSpec]:
) -> InstructionResult:
# Step 1 — pre-filter
try:
if instr.condition and not evaluate_condition(instr.condition, event, context):
return []
return _empty_result(instr)
except UnsafeExpression as exc:
logger.warning("instruction %r condition is unsafe — %s", instr.id, exc)
return []
return _empty_result(instr)
# Step 2 — render prompt (raises UntrustedFieldError on policy violation)
rendered = _render_prompt(instr.prompt, instr.trusted_fields, event, context)
@@ -124,34 +149,87 @@ def _execute(
raw_output = llm_client.complete(rendered, model=instr.model)
# Step 4 — validate and optionally retry
task_specs, error = _validate_output(raw_output, instr)
task_specs, report, error = _validate_output(raw_output, instr)
if error:
retry_prompt = rendered + f"\n\nPrevious output was invalid: {error}\nPlease fix."
raw_output = llm_client.complete(retry_prompt, model=instr.model)
task_specs, error = _validate_output(raw_output, instr)
task_specs, report, error = _validate_output(raw_output, instr)
if error:
logger.warning(
"instruction_output_error: instruction=%r, prompt_hash=%s, error=%s",
instr.id, prompt_hash, error,
)
return []
return _empty_result(instr, prompt_hash=prompt_hash)
return task_specs
return InstructionResult(
tasks=task_specs,
report=report,
prompt_hash=prompt_hash,
model=instr.model,
output_validated=True,
review_required=bool(getattr(instr, "review_required", False)),
condition_matched=instr.condition or None,
)
def _validate_output(raw_output: Any, instr: Any) -> tuple[list[TaskSpec], str | None]:
"""Parse raw LLM output into TaskSpec list. Returns (specs, error_message)."""
def _empty_result(instr: Any, prompt_hash: str | None = None) -> InstructionResult:
return InstructionResult(
tasks=[],
prompt_hash=prompt_hash,
model=getattr(instr, "model", None),
output_validated=False,
review_required=bool(getattr(instr, "review_required", False)),
condition_matched=getattr(instr, "condition", "") or None,
)
def _validate_output(
raw_output: Any,
instr: Any,
) -> tuple[list[TaskSpec], dict[str, Any] | None, str | None]:
"""Parse raw LLM output into TaskSpecs and optional report payload.
Accepted shapes:
- list[task]
- single task dict with title/description/etc.
- {"tasks": [...], "report": {...}}
- report-only dict, such as {"summary": "...", "recommendations": [...]}
Returns (specs, report, error_message).
"""
try:
if isinstance(raw_output, str):
data = json.loads(raw_output)
else:
data = raw_output
if not isinstance(data, list):
data = [data]
schema_error = _validate_against_schema(data, getattr(instr, "output_schema", ""))
if schema_error:
return [], None, schema_error
report: dict[str, Any] | None = None
task_items: list[Any]
if isinstance(data, dict) and ("tasks" in data or "report" in data):
maybe_report = data.get("report")
if maybe_report is not None and not isinstance(maybe_report, dict):
return [], None, "report must be a JSON object"
report = maybe_report
tasks = data.get("tasks", [])
if not isinstance(tasks, list):
return [], None, "tasks must be a JSON array"
task_items = tasks
elif isinstance(data, dict) and "title" not in data:
report = data
task_items = []
elif isinstance(data, list):
task_items = data
else:
task_items = [data]
specs = []
for item in data:
for item in task_items:
if not isinstance(item, dict):
return [], None, "each task must be a JSON object"
specs.append(TaskSpec(
title=item.get("title", ""),
description=item.get("description", ""),
@@ -162,6 +240,70 @@ def _validate_output(raw_output: Any, instr: Any) -> tuple[list[TaskSpec], str |
source_type="instruction",
source_id=instr.id,
))
return specs, None
return specs, report, None
except (json.JSONDecodeError, AttributeError, KeyError, TypeError) as exc:
return [], str(exc)
return [], None, str(exc)
def _validate_against_schema(data: Any, schema_path: str) -> str | None:
if not schema_path:
return None
path = Path(schema_path)
if not path.exists():
return None
try:
schema = json.loads(path.read_text(encoding="utf-8"))
except (OSError, json.JSONDecodeError) as exc:
return f"could not read output schema: {exc}"
return _validate_schema_node(data, schema, "$")
def _validate_schema_node(data: Any, schema: dict[str, Any], path: str) -> str | None:
expected_type = schema.get("type")
if expected_type and not _matches_type(data, expected_type):
return f"{path}: expected {expected_type}"
if expected_type == "object":
required = schema.get("required", [])
if isinstance(required, list):
for key in required:
if isinstance(key, str) and key not in data:
return f"{path}: missing required property {key!r}"
properties = schema.get("properties", {})
if isinstance(properties, dict):
for key, child_schema in properties.items():
if key in data and isinstance(child_schema, dict):
error = _validate_schema_node(data[key], child_schema, f"{path}.{key}")
if error:
return error
if expected_type == "array":
item_schema = schema.get("items")
if isinstance(item_schema, dict):
for index, item in enumerate(data):
error = _validate_schema_node(item, item_schema, f"{path}[{index}]")
if error:
return error
return None
def _matches_type(data: Any, expected_type: str) -> bool:
if expected_type == "object":
return isinstance(data, dict)
if expected_type == "array":
return isinstance(data, list)
if expected_type == "string":
return isinstance(data, str)
if expected_type == "integer":
return isinstance(data, int) and not isinstance(data, bool)
if expected_type == "number":
return isinstance(data, (int, float)) and not isinstance(data, bool)
if expected_type == "boolean":
return isinstance(data, bool)
if expected_type == "null":
return data is None
return True

View File

@@ -21,6 +21,7 @@ with workflow.unsafe.imports_passed_through():
from activity_core.activities import (
emit_tasks,
evaluate_rules,
evaluate_instructions,
load_activity_definition,
log_run,
persist_task_instance,
@@ -136,6 +137,19 @@ class RunActivityWorkflow:
"condition": rule.get("condition", ""),
})
if defn.get("instructions"):
instruction_result: dict = await workflow.execute_activity(
evaluate_instructions,
{
"instructions": defn.get("instructions", []),
"event": event_attrs,
"context": context_snapshot,
},
start_to_close_timeout=_ACTIVITY_TIMEOUT,
retry_policy=_RETRY_POLICY,
)
task_spec_dicts.extend(instruction_result.get("task_specs", []))
# ── 4. Emit tasks via IssueSink ───────────────────────────────────────
if trigger_key == SCHEDULED_TRIGGER_KEY:
dedup_source = workflow.info().workflow_id

View File

@@ -21,6 +21,7 @@ from activity_core.rules.executor import (
UntrustedFieldError,
_render_prompt,
execute_instruction,
execute_instruction_with_audit,
)
@@ -201,6 +202,82 @@ def test_valid_llm_output_returns_task_spec():
assert result[0].source_type == "instruction"
def test_execute_instruction_with_audit_returns_metadata():
task_data = [{"title": "Run triage", "priority": "high"}]
llm = _CountingLLM([json.dumps(task_data)])
instr = _instr(
id="daily-triage",
condition="",
prompt="Check State Hub.",
trusted_fields=[],
model="test-model",
review_required=True,
)
result = execute_instruction_with_audit(instr, _Event(), {}, llm)
assert len(result.tasks) == 1
assert result.tasks[0].source_id == "daily-triage"
assert result.prompt_hash is not None
assert len(result.prompt_hash) == 64
assert result.model == "test-model"
assert result.output_validated is True
assert result.review_required is True
def test_execute_instruction_with_audit_accepts_report_payload():
report_data = {
"summary": "State Hub has loose ends.",
"recommendations": [{"action": "revisit", "candidate": "CUST-WP-0045"}],
}
llm = _CountingLLM([json.dumps(report_data)])
instr = _instr(
id="daily-triage-report",
prompt="Report.",
trusted_fields=[],
output_schema="schemas/daily-triage-report.json",
)
result = execute_instruction_with_audit(instr, _Event(), {}, llm)
assert result.tasks == []
assert result.report == report_data
assert result.output_validated is True
def test_execute_instruction_with_audit_rejects_invalid_report_schema():
report_data = {"summary": "Missing recommendations."}
llm = _CountingLLM([json.dumps(report_data), json.dumps(report_data)])
instr = _instr(
id="daily-triage-report",
prompt="Report.",
trusted_fields=[],
output_schema="schemas/daily-triage-report.json",
)
result = execute_instruction_with_audit(instr, _Event(), {}, llm)
assert result.tasks == []
assert result.report is None
assert result.output_validated is False
assert llm.call_count == 2
def test_execute_instruction_with_audit_accepts_report_and_tasks_envelope():
envelope = {
"report": {"summary": "Review needed."},
"tasks": [{"title": "Inspect CUST-WP-0045"}],
}
llm = _CountingLLM([json.dumps(envelope)])
instr = _instr(id="daily-triage-report", prompt="Report.", trusted_fields=[])
result = execute_instruction_with_audit(instr, _Event(), {}, llm)
assert result.report == {"summary": "Review needed."}
assert len(result.tasks) == 1
assert result.tasks[0].title == "Inspect CUST-WP-0045"
# ── Condition pre-filter ───────────────────────────────────────────────────────
def test_condition_false_skips_llm():

View File

@@ -0,0 +1,116 @@
from __future__ import annotations
import json
import pytest
from activity_core import activities
class FakeLLMClient:
def __init__(self, response: str) -> None:
self.response = response
self.calls: list[tuple[str, str]] = []
def complete(self, prompt: str, model: str = "") -> str:
self.calls.append((prompt, model))
return self.response
@pytest.mark.asyncio
async def test_evaluate_instructions_returns_task_specs_with_audit(monkeypatch) -> None:
llm = FakeLLMClient(json.dumps([
{
"title": "Run daily triage",
"description": "Review State Hub loose ends.",
"priority": "high",
"labels": ["triage"],
}
]))
monkeypatch.setattr(activities, "get_llm_client", lambda: llm)
result = await activities.evaluate_instructions({
"instructions": [
{
"id": "daily-triage",
"trusted_fields": ["context.summary.open_tasks"],
"model": "test-model",
"prompt": "Open tasks: {context.summary.open_tasks}",
"output_schema": "",
"review_required": False,
}
],
"event": {},
"context": {"summary": {"open_tasks": 3}},
})
task_specs = result["task_specs"]
assert len(task_specs) == 1
spec = task_specs[0]
assert spec["title"] == "Run daily triage"
assert spec["source_type"] == "instruction"
assert spec["source_id"] == "daily-triage"
assert spec["model"] == "test-model"
assert spec["output_validated"] is True
assert spec["review_required"] is False
assert spec["prompt_hash"] is not None
assert len(spec["prompt_hash"]) == 64
assert result["reports"] == []
assert llm.calls == [("Open tasks: 3", "test-model")]
@pytest.mark.asyncio
async def test_evaluate_instructions_returns_report_payload(monkeypatch) -> None:
llm = FakeLLMClient(json.dumps({
"summary": "State Hub has open loose ends.",
"recommendations": [{"candidate": "CUST-WP-0045", "action": "work-next"}],
}))
monkeypatch.setattr(activities, "get_llm_client", lambda: llm)
result = await activities.evaluate_instructions({
"instructions": [
{
"id": "daily-triage-report",
"trusted_fields": [],
"model": "test-model",
"prompt": "Run report.",
"output_schema": "schemas/daily-triage-report.json",
"review_required": False,
}
],
"event": {},
"context": {},
})
assert result["task_specs"] == []
assert len(result["reports"]) == 1
report = result["reports"][0]
assert report["instruction_id"] == "daily-triage-report"
assert report["report"]["summary"] == "State Hub has open loose ends."
assert report["output_validated"] is True
assert report["prompt_hash"] is not None
@pytest.mark.asyncio
async def test_evaluate_instructions_without_llm_client_returns_no_tasks(monkeypatch) -> None:
class RaisingClient:
def complete(self, prompt: str, model: str = "") -> str: # noqa: ARG002
raise RuntimeError("not configured")
monkeypatch.setattr(activities, "get_llm_client", lambda: RaisingClient())
result = await activities.evaluate_instructions({
"instructions": [
{
"id": "daily-triage",
"trusted_fields": [],
"model": "test-model",
"prompt": "Run triage.",
"output_schema": "schemas/daily-triage-report.json",
}
],
"event": {},
"context": {},
})
assert result == {"task_specs": [], "reports": []}