Wire instruction report execution

This commit is contained in:
2026-05-19 18:28:23 +02:00
parent 0e7084207e
commit 0dc342eb1b
7 changed files with 513 additions and 16 deletions

View File

@@ -0,0 +1,15 @@
{
"type": "object",
"required": ["summary", "recommendations"],
"properties": {
"summary": {
"type": "string"
},
"recommendations": {
"type": "array",
"items": {
"type": "object"
}
}
}
}

View File

@@ -25,6 +25,9 @@ from activity_core.issue_sink import get_issue_sink
from activity_core.orm import ActivityDefinition as ActivityDefinitionRow from activity_core.orm import ActivityDefinition as ActivityDefinitionRow
from activity_core.orm import ActivityRun, TaskInstance, TaskSpawnLog from activity_core.orm import ActivityRun, TaskInstance, TaskSpawnLog
from activity_core.rules import evaluate_condition from activity_core.rules import evaluate_condition
from activity_core.llm_client import get_llm_client
from activity_core.models import InstructionDef
from activity_core.rules.executor import execute_instruction_with_audit
_session_factory: async_sessionmaker[AsyncSession] | None = None _session_factory: async_sessionmaker[AsyncSession] | None = None
@@ -267,6 +270,75 @@ async def evaluate_rules(payload: dict) -> list[dict]:
return matched return matched
@activity.defn
async def evaluate_instructions(payload: dict) -> dict:
"""Evaluate instruction blocks and return task specs/reports with audit fields.
Expected keys in payload:
instructions list[dict] — InstructionDef serialised dicts
event dict — EventEnvelope attributes (or empty for cron)
context dict — context snapshot from resolve_context
"""
instructions = payload.get("instructions", [])
event_attrs = payload.get("event", {})
context = payload.get("context", {})
llm_client = get_llm_client()
class _Env:
def __init__(self, attrs: dict) -> None:
self.attributes = _DictObj(attrs)
class _DictObj:
def __init__(self, d: dict) -> None:
self.__dict__.update(d)
event_obj = _Env(event_attrs)
task_specs: list[dict] = []
reports: list[dict] = []
for raw_instruction in instructions:
try:
instruction = InstructionDef.model_validate(raw_instruction)
except Exception as exc:
activity.logger.warning("instruction definition invalid — %s", exc)
continue
result = execute_instruction_with_audit(
instruction,
event_obj,
context,
llm_client,
)
if result.report is not None:
reports.append({
"instruction_id": instruction.id,
"report": result.report,
"condition": result.condition_matched,
"prompt_hash": result.prompt_hash,
"model": result.model,
"output_validated": result.output_validated,
"review_required": result.review_required,
})
for spec in result.tasks:
task_specs.append({
"title": spec.title,
"description": spec.description,
"target_repo": spec.target_repo,
"priority": spec.priority,
"labels": spec.labels,
"due_in_days": spec.due_in_days,
"source_type": "instruction",
"source_id": instruction.id,
"condition": result.condition_matched,
"prompt_hash": result.prompt_hash,
"model": result.model,
"output_validated": result.output_validated,
"review_required": result.review_required,
})
return {"task_specs": task_specs, "reports": reports}
@activity.defn @activity.defn
async def emit_tasks(payload: dict) -> list[str]: async def emit_tasks(payload: dict) -> list[str]:
"""Emit TaskSpecs to IssueSink and write task_spawn_log rows. """Emit TaskSpecs to IssueSink and write task_spawn_log rows.
@@ -316,6 +388,10 @@ async def emit_tasks(payload: dict) -> list[str]:
triggering_event_id=triggering_event_id, triggering_event_id=triggering_event_id,
task_ref=ref.external_id, task_ref=ref.external_id,
condition_matched=spec_dict.get("condition"), condition_matched=spec_dict.get("condition"),
prompt_hash=spec_dict.get("prompt_hash"),
model=spec_dict.get("model"),
output_validated=spec_dict.get("output_validated"),
review_required=spec_dict.get("review_required"),
) )
session.add(log_row) session.add(log_row)
except Exception as exc: except Exception as exc:

View File

@@ -0,0 +1,57 @@
"""llm-connect adapter for instruction execution.
activity-core deliberately talks to llm-connect over its small HTTP surface
instead of importing provider-specific SDKs. This keeps the activity worker on
owned infrastructure while leaving provider selection, API keys, and model
routing behind the existing llm-connect boundary.
"""
from __future__ import annotations
import os
from typing import Any
import httpx
class DisabledLLMClient:
"""LLM client used when no llm-connect endpoint is configured."""
def complete(self, prompt: str, model: str = "") -> str: # noqa: ARG002
raise RuntimeError("LLM_CONNECT_URL is not configured")
class LLMConnectClient:
"""Small synchronous client for llm-connect server mode."""
def __init__(self, base_url: str, timeout_seconds: float = 300.0) -> None:
self.base_url = base_url.rstrip("/")
self.timeout_seconds = timeout_seconds
def complete(self, prompt: str, model: str = "") -> str:
payload: dict[str, Any] = {
"prompt": prompt,
"config": {
"model_name": model,
"timeout_seconds": int(self.timeout_seconds),
},
}
resp = httpx.post(
f"{self.base_url}/execute",
json=payload,
timeout=self.timeout_seconds,
)
resp.raise_for_status()
data = resp.json()
content = data.get("content")
if not isinstance(content, str):
raise ValueError("llm-connect response missing string content")
return content
def get_llm_client() -> DisabledLLMClient | LLMConnectClient:
base_url = os.environ.get("LLM_CONNECT_URL", "").strip()
if not base_url:
return DisabledLLMClient()
timeout = float(os.environ.get("LLM_CONNECT_TIMEOUT_SECONDS", "300"))
return LLMConnectClient(base_url, timeout)

View File

@@ -11,6 +11,8 @@ import hashlib
import json import json
import logging import logging
import re import re
from dataclasses import dataclass
from pathlib import Path
from typing import Any from typing import Any
from activity_core.rules.evaluator import UnsafeExpression, evaluate_condition from activity_core.rules.evaluator import UnsafeExpression, evaluate_condition
@@ -26,6 +28,19 @@ class UntrustedFieldError(ValueError):
"""Raised when a prompt placeholder references a field not in trusted_fields.""" """Raised when a prompt placeholder references a field not in trusted_fields."""
@dataclass
class InstructionResult:
"""Instruction output plus audit metadata for workflow integration."""
tasks: list[TaskSpec]
report: dict[str, Any] | None = None
prompt_hash: str | None = None
model: str | None = None
output_validated: bool = False
review_required: bool = False
condition_matched: str | None = None
def _resolve_path(obj: Any, path: str) -> Any: def _resolve_path(obj: Any, path: str) -> Any:
"""Walk a dot-separated path on obj or dict. Returns None if not found.""" """Walk a dot-separated path on obj or dict. Returns None if not found."""
parts = path.split(".") parts = path.split(".")
@@ -92,14 +107,24 @@ def execute_instruction(
4. Validate response against instr.output_schema (JSON Schema). Retry once. 4. Validate response against instr.output_schema (JSON Schema). Retry once.
5. Return list[TaskSpec]. 5. Return list[TaskSpec].
""" """
return execute_instruction_with_audit(instr, event, context, llm_client).tasks
def execute_instruction_with_audit(
instr: Any,
event: Any,
context: dict,
llm_client: Any,
) -> InstructionResult:
"""Evaluate an Instruction and return task specs plus audit metadata."""
try: try:
return _execute(instr, event, context, llm_client) return _execute(instr, event, context, llm_client)
except UntrustedFieldError as exc: except UntrustedFieldError as exc:
logger.warning("instruction %r rejected — %s", instr.id, exc) logger.warning("instruction %r rejected — %s", instr.id, exc)
return [] return _empty_result(instr)
except Exception as exc: except Exception as exc:
logger.warning("instruction %r failed — %s", instr.id, exc) logger.warning("instruction %r failed — %s", instr.id, exc)
return [] return _empty_result(instr)
def _execute( def _execute(
@@ -107,14 +132,14 @@ def _execute(
event: Any, event: Any,
context: dict, context: dict,
llm_client: Any, llm_client: Any,
) -> list[TaskSpec]: ) -> InstructionResult:
# Step 1 — pre-filter # Step 1 — pre-filter
try: try:
if instr.condition and not evaluate_condition(instr.condition, event, context): if instr.condition and not evaluate_condition(instr.condition, event, context):
return [] return _empty_result(instr)
except UnsafeExpression as exc: except UnsafeExpression as exc:
logger.warning("instruction %r condition is unsafe — %s", instr.id, exc) logger.warning("instruction %r condition is unsafe — %s", instr.id, exc)
return [] return _empty_result(instr)
# Step 2 — render prompt (raises UntrustedFieldError on policy violation) # Step 2 — render prompt (raises UntrustedFieldError on policy violation)
rendered = _render_prompt(instr.prompt, instr.trusted_fields, event, context) rendered = _render_prompt(instr.prompt, instr.trusted_fields, event, context)
@@ -124,34 +149,87 @@ def _execute(
raw_output = llm_client.complete(rendered, model=instr.model) raw_output = llm_client.complete(rendered, model=instr.model)
# Step 4 — validate and optionally retry # Step 4 — validate and optionally retry
task_specs, error = _validate_output(raw_output, instr) task_specs, report, error = _validate_output(raw_output, instr)
if error: if error:
retry_prompt = rendered + f"\n\nPrevious output was invalid: {error}\nPlease fix." retry_prompt = rendered + f"\n\nPrevious output was invalid: {error}\nPlease fix."
raw_output = llm_client.complete(retry_prompt, model=instr.model) raw_output = llm_client.complete(retry_prompt, model=instr.model)
task_specs, error = _validate_output(raw_output, instr) task_specs, report, error = _validate_output(raw_output, instr)
if error: if error:
logger.warning( logger.warning(
"instruction_output_error: instruction=%r, prompt_hash=%s, error=%s", "instruction_output_error: instruction=%r, prompt_hash=%s, error=%s",
instr.id, prompt_hash, error, instr.id, prompt_hash, error,
) )
return [] return _empty_result(instr, prompt_hash=prompt_hash)
return task_specs return InstructionResult(
tasks=task_specs,
report=report,
prompt_hash=prompt_hash,
model=instr.model,
output_validated=True,
review_required=bool(getattr(instr, "review_required", False)),
condition_matched=instr.condition or None,
)
def _validate_output(raw_output: Any, instr: Any) -> tuple[list[TaskSpec], str | None]: def _empty_result(instr: Any, prompt_hash: str | None = None) -> InstructionResult:
"""Parse raw LLM output into TaskSpec list. Returns (specs, error_message).""" return InstructionResult(
tasks=[],
prompt_hash=prompt_hash,
model=getattr(instr, "model", None),
output_validated=False,
review_required=bool(getattr(instr, "review_required", False)),
condition_matched=getattr(instr, "condition", "") or None,
)
def _validate_output(
raw_output: Any,
instr: Any,
) -> tuple[list[TaskSpec], dict[str, Any] | None, str | None]:
"""Parse raw LLM output into TaskSpecs and optional report payload.
Accepted shapes:
- list[task]
- single task dict with title/description/etc.
- {"tasks": [...], "report": {...}}
- report-only dict, such as {"summary": "...", "recommendations": [...]}
Returns (specs, report, error_message).
"""
try: try:
if isinstance(raw_output, str): if isinstance(raw_output, str):
data = json.loads(raw_output) data = json.loads(raw_output)
else: else:
data = raw_output data = raw_output
if not isinstance(data, list): schema_error = _validate_against_schema(data, getattr(instr, "output_schema", ""))
data = [data] if schema_error:
return [], None, schema_error
report: dict[str, Any] | None = None
task_items: list[Any]
if isinstance(data, dict) and ("tasks" in data or "report" in data):
maybe_report = data.get("report")
if maybe_report is not None and not isinstance(maybe_report, dict):
return [], None, "report must be a JSON object"
report = maybe_report
tasks = data.get("tasks", [])
if not isinstance(tasks, list):
return [], None, "tasks must be a JSON array"
task_items = tasks
elif isinstance(data, dict) and "title" not in data:
report = data
task_items = []
elif isinstance(data, list):
task_items = data
else:
task_items = [data]
specs = [] specs = []
for item in data: for item in task_items:
if not isinstance(item, dict):
return [], None, "each task must be a JSON object"
specs.append(TaskSpec( specs.append(TaskSpec(
title=item.get("title", ""), title=item.get("title", ""),
description=item.get("description", ""), description=item.get("description", ""),
@@ -162,6 +240,70 @@ def _validate_output(raw_output: Any, instr: Any) -> tuple[list[TaskSpec], str |
source_type="instruction", source_type="instruction",
source_id=instr.id, source_id=instr.id,
)) ))
return specs, None return specs, report, None
except (json.JSONDecodeError, AttributeError, KeyError, TypeError) as exc: except (json.JSONDecodeError, AttributeError, KeyError, TypeError) as exc:
return [], str(exc) return [], None, str(exc)
def _validate_against_schema(data: Any, schema_path: str) -> str | None:
if not schema_path:
return None
path = Path(schema_path)
if not path.exists():
return None
try:
schema = json.loads(path.read_text(encoding="utf-8"))
except (OSError, json.JSONDecodeError) as exc:
return f"could not read output schema: {exc}"
return _validate_schema_node(data, schema, "$")
def _validate_schema_node(data: Any, schema: dict[str, Any], path: str) -> str | None:
expected_type = schema.get("type")
if expected_type and not _matches_type(data, expected_type):
return f"{path}: expected {expected_type}"
if expected_type == "object":
required = schema.get("required", [])
if isinstance(required, list):
for key in required:
if isinstance(key, str) and key not in data:
return f"{path}: missing required property {key!r}"
properties = schema.get("properties", {})
if isinstance(properties, dict):
for key, child_schema in properties.items():
if key in data and isinstance(child_schema, dict):
error = _validate_schema_node(data[key], child_schema, f"{path}.{key}")
if error:
return error
if expected_type == "array":
item_schema = schema.get("items")
if isinstance(item_schema, dict):
for index, item in enumerate(data):
error = _validate_schema_node(item, item_schema, f"{path}[{index}]")
if error:
return error
return None
def _matches_type(data: Any, expected_type: str) -> bool:
if expected_type == "object":
return isinstance(data, dict)
if expected_type == "array":
return isinstance(data, list)
if expected_type == "string":
return isinstance(data, str)
if expected_type == "integer":
return isinstance(data, int) and not isinstance(data, bool)
if expected_type == "number":
return isinstance(data, (int, float)) and not isinstance(data, bool)
if expected_type == "boolean":
return isinstance(data, bool)
if expected_type == "null":
return data is None
return True

View File

@@ -21,6 +21,7 @@ with workflow.unsafe.imports_passed_through():
from activity_core.activities import ( from activity_core.activities import (
emit_tasks, emit_tasks,
evaluate_rules, evaluate_rules,
evaluate_instructions,
load_activity_definition, load_activity_definition,
log_run, log_run,
persist_task_instance, persist_task_instance,
@@ -136,6 +137,19 @@ class RunActivityWorkflow:
"condition": rule.get("condition", ""), "condition": rule.get("condition", ""),
}) })
if defn.get("instructions"):
instruction_result: dict = await workflow.execute_activity(
evaluate_instructions,
{
"instructions": defn.get("instructions", []),
"event": event_attrs,
"context": context_snapshot,
},
start_to_close_timeout=_ACTIVITY_TIMEOUT,
retry_policy=_RETRY_POLICY,
)
task_spec_dicts.extend(instruction_result.get("task_specs", []))
# ── 4. Emit tasks via IssueSink ─────────────────────────────────────── # ── 4. Emit tasks via IssueSink ───────────────────────────────────────
if trigger_key == SCHEDULED_TRIGGER_KEY: if trigger_key == SCHEDULED_TRIGGER_KEY:
dedup_source = workflow.info().workflow_id dedup_source = workflow.info().workflow_id

View File

@@ -21,6 +21,7 @@ from activity_core.rules.executor import (
UntrustedFieldError, UntrustedFieldError,
_render_prompt, _render_prompt,
execute_instruction, execute_instruction,
execute_instruction_with_audit,
) )
@@ -201,6 +202,82 @@ def test_valid_llm_output_returns_task_spec():
assert result[0].source_type == "instruction" assert result[0].source_type == "instruction"
def test_execute_instruction_with_audit_returns_metadata():
task_data = [{"title": "Run triage", "priority": "high"}]
llm = _CountingLLM([json.dumps(task_data)])
instr = _instr(
id="daily-triage",
condition="",
prompt="Check State Hub.",
trusted_fields=[],
model="test-model",
review_required=True,
)
result = execute_instruction_with_audit(instr, _Event(), {}, llm)
assert len(result.tasks) == 1
assert result.tasks[0].source_id == "daily-triage"
assert result.prompt_hash is not None
assert len(result.prompt_hash) == 64
assert result.model == "test-model"
assert result.output_validated is True
assert result.review_required is True
def test_execute_instruction_with_audit_accepts_report_payload():
report_data = {
"summary": "State Hub has loose ends.",
"recommendations": [{"action": "revisit", "candidate": "CUST-WP-0045"}],
}
llm = _CountingLLM([json.dumps(report_data)])
instr = _instr(
id="daily-triage-report",
prompt="Report.",
trusted_fields=[],
output_schema="schemas/daily-triage-report.json",
)
result = execute_instruction_with_audit(instr, _Event(), {}, llm)
assert result.tasks == []
assert result.report == report_data
assert result.output_validated is True
def test_execute_instruction_with_audit_rejects_invalid_report_schema():
report_data = {"summary": "Missing recommendations."}
llm = _CountingLLM([json.dumps(report_data), json.dumps(report_data)])
instr = _instr(
id="daily-triage-report",
prompt="Report.",
trusted_fields=[],
output_schema="schemas/daily-triage-report.json",
)
result = execute_instruction_with_audit(instr, _Event(), {}, llm)
assert result.tasks == []
assert result.report is None
assert result.output_validated is False
assert llm.call_count == 2
def test_execute_instruction_with_audit_accepts_report_and_tasks_envelope():
envelope = {
"report": {"summary": "Review needed."},
"tasks": [{"title": "Inspect CUST-WP-0045"}],
}
llm = _CountingLLM([json.dumps(envelope)])
instr = _instr(id="daily-triage-report", prompt="Report.", trusted_fields=[])
result = execute_instruction_with_audit(instr, _Event(), {}, llm)
assert result.report == {"summary": "Review needed."}
assert len(result.tasks) == 1
assert result.tasks[0].title == "Inspect CUST-WP-0045"
# ── Condition pre-filter ─────────────────────────────────────────────────────── # ── Condition pre-filter ───────────────────────────────────────────────────────
def test_condition_false_skips_llm(): def test_condition_false_skips_llm():

View File

@@ -0,0 +1,116 @@
from __future__ import annotations
import json
import pytest
from activity_core import activities
class FakeLLMClient:
def __init__(self, response: str) -> None:
self.response = response
self.calls: list[tuple[str, str]] = []
def complete(self, prompt: str, model: str = "") -> str:
self.calls.append((prompt, model))
return self.response
@pytest.mark.asyncio
async def test_evaluate_instructions_returns_task_specs_with_audit(monkeypatch) -> None:
llm = FakeLLMClient(json.dumps([
{
"title": "Run daily triage",
"description": "Review State Hub loose ends.",
"priority": "high",
"labels": ["triage"],
}
]))
monkeypatch.setattr(activities, "get_llm_client", lambda: llm)
result = await activities.evaluate_instructions({
"instructions": [
{
"id": "daily-triage",
"trusted_fields": ["context.summary.open_tasks"],
"model": "test-model",
"prompt": "Open tasks: {context.summary.open_tasks}",
"output_schema": "",
"review_required": False,
}
],
"event": {},
"context": {"summary": {"open_tasks": 3}},
})
task_specs = result["task_specs"]
assert len(task_specs) == 1
spec = task_specs[0]
assert spec["title"] == "Run daily triage"
assert spec["source_type"] == "instruction"
assert spec["source_id"] == "daily-triage"
assert spec["model"] == "test-model"
assert spec["output_validated"] is True
assert spec["review_required"] is False
assert spec["prompt_hash"] is not None
assert len(spec["prompt_hash"]) == 64
assert result["reports"] == []
assert llm.calls == [("Open tasks: 3", "test-model")]
@pytest.mark.asyncio
async def test_evaluate_instructions_returns_report_payload(monkeypatch) -> None:
llm = FakeLLMClient(json.dumps({
"summary": "State Hub has open loose ends.",
"recommendations": [{"candidate": "CUST-WP-0045", "action": "work-next"}],
}))
monkeypatch.setattr(activities, "get_llm_client", lambda: llm)
result = await activities.evaluate_instructions({
"instructions": [
{
"id": "daily-triage-report",
"trusted_fields": [],
"model": "test-model",
"prompt": "Run report.",
"output_schema": "schemas/daily-triage-report.json",
"review_required": False,
}
],
"event": {},
"context": {},
})
assert result["task_specs"] == []
assert len(result["reports"]) == 1
report = result["reports"][0]
assert report["instruction_id"] == "daily-triage-report"
assert report["report"]["summary"] == "State Hub has open loose ends."
assert report["output_validated"] is True
assert report["prompt_hash"] is not None
@pytest.mark.asyncio
async def test_evaluate_instructions_without_llm_client_returns_no_tasks(monkeypatch) -> None:
class RaisingClient:
def complete(self, prompt: str, model: str = "") -> str: # noqa: ARG002
raise RuntimeError("not configured")
monkeypatch.setattr(activities, "get_llm_client", lambda: RaisingClient())
result = await activities.evaluate_instructions({
"instructions": [
{
"id": "daily-triage",
"trusted_fields": [],
"model": "test-model",
"prompt": "Run triage.",
"output_schema": "schemas/daily-triage-report.json",
}
],
"event": {},
"context": {},
})
assert result == {"task_specs": [], "reports": []}