feat(event-bridge): WP-0003a — domain model, rules module, event type registry

Implements phases 7–8 of the Event Bridge architecture (custodian-WP-0003a).

Domain model (T34, T40):
- Added RuleDef, InstructionDef, ActionDef to models.py
- Updated ActivityDefinition with rules/instructions fields (task_templates deprecated)
- Formalized EventEnvelope: id, type, version, timestamp, publisher, attributes
- Added from_nats_message() and from_webhook_payload() classmethods

Rules module (T35, T36, T37):
- src/activity_core/rules/ skeleton with boundary enforcement
- evaluate_condition() — sandboxed AST walker, whitelisted nodes only, never exec()
- execute_instruction() — LLM task generation with trusted_fields injection guard
- tests/rules/test_boundary.py verifies no cross-boundary imports

Infrastructure (T38, T39):
- Alembic migrations 0004 (task_spawn_log) and 0005 (event_types)
- IssueSink ABC + IssueCoreRestSink (REST) + NullSink (testing)
- TaskSpawnLog and EventType ORM models

Event type registry (T41, T42, T43):
- event_type_registry.py: file scanner, parser, DB sync, in-process lookup
- ACTIVITY_CURATOR_GATE env var (disabled|required) + approve endpoint
- Three org event type definitions: org.repo.registered, org.workstream.completed,
  org.activity.run.completed

All 10 tests pass. Boundary test confirms rules/ isolation.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-14 22:01:15 +02:00
parent ee81adb2fa
commit c3a256509b
22 changed files with 1281 additions and 137 deletions

View File

@@ -0,0 +1,11 @@
"""
Rules module — sandboxed rule evaluation and LLM-driven instruction execution.
Boundary: nothing in this package may import from temporalio, sqlalchemy,
fastapi, or any activity_core.* module outside rules/.
"""
from activity_core.rules.evaluator import evaluate_condition
from activity_core.rules.executor import execute_instruction
__all__ = ["evaluate_condition", "execute_instruction"]

View File

@@ -0,0 +1,181 @@
"""
Rule condition evaluator — sandboxed AST walker.
Never calls exec() or eval(). Raises UnsafeExpression at parse time for any
AST node not in the whitelist.
"""
from __future__ import annotations
import ast
import operator
from typing import Any
class UnsafeExpression(ValueError):
"""Raised when a condition expression contains non-whitelisted AST constructs."""
_ALLOWED_NODE_TYPES = frozenset({
ast.Expression,
ast.BoolOp, ast.And, ast.Or,
ast.UnaryOp, ast.Not,
ast.Compare,
ast.Eq, ast.NotEq, ast.Lt, ast.LtE, ast.Gt, ast.GtE, ast.In, ast.NotIn,
ast.Name, ast.Attribute, ast.Constant,
ast.Call, # only len() — enforced in _check_call
ast.List, ast.Tuple,
# Load/Store/Del contexts
ast.Load, ast.Store, ast.Del,
})
_COMPARE_OPS = {
ast.Eq: operator.eq,
ast.NotEq: operator.ne,
ast.Lt: operator.lt,
ast.LtE: operator.le,
ast.Gt: operator.gt,
ast.GtE: operator.ge,
ast.In: lambda a, b: a in b,
ast.NotIn: lambda a, b: a not in b,
}
def _check_call(node: ast.Call) -> None:
"""Only len() is allowed. Reject everything else."""
if not (isinstance(node.func, ast.Name) and node.func.id == "len"):
raise UnsafeExpression(
f"function call not allowed: {ast.unparse(node)!r}"
)
if node.keywords:
raise UnsafeExpression("keyword arguments not allowed in len() call")
def _validate(node: ast.AST) -> None:
"""Walk the AST and raise UnsafeExpression for any non-whitelisted node."""
if type(node) not in _ALLOWED_NODE_TYPES:
raise UnsafeExpression(
f"expression contains forbidden construct: {type(node).__name__}"
)
if isinstance(node, ast.Call):
_check_call(node)
for child in ast.iter_child_nodes(node):
_validate(child)
def _resolve(obj: Any, path: list[str]) -> Any:
"""Walk obj by attribute names. Missing attributes return None."""
current = obj
for part in path:
if current is None:
return None
if isinstance(current, dict):
current = current.get(part)
else:
current = getattr(current, part, None)
return current
def _eval_node(node: ast.AST, event: Any, context: dict) -> Any:
"""Recursively evaluate a validated AST node."""
if isinstance(node, ast.Expression):
return _eval_node(node.body, event, context)
if isinstance(node, ast.Constant):
return node.value
if isinstance(node, ast.Name):
if node.id == "None":
return None
if node.id == "True":
return True
if node.id == "False":
return False
# Top-level names: event, context
if node.id == "event":
return event
if node.id == "context":
return context
return None
if isinstance(node, ast.Attribute):
# Walk attribute chain to find root name + path
parts: list[str] = []
current: ast.AST = node
while isinstance(current, ast.Attribute):
parts.append(current.attr)
current = current.value
parts.reverse()
if isinstance(current, ast.Name):
root_name = current.id
if root_name == "event":
return _resolve(event, parts)
if root_name == "context":
return _resolve(context, parts)
return None
if isinstance(node, ast.BoolOp):
if isinstance(node.op, ast.And):
for val in node.values:
if not _eval_node(val, event, context):
return False
return True
if isinstance(node.op, ast.Or):
for val in node.values:
if _eval_node(val, event, context):
return True
return False
if isinstance(node, ast.UnaryOp) and isinstance(node.op, ast.Not):
return not _eval_node(node.operand, event, context)
if isinstance(node, ast.Compare):
left = _eval_node(node.left, event, context)
for op_node, comparator in zip(node.ops, node.comparators):
right = _eval_node(comparator, event, context)
op_fn = _COMPARE_OPS.get(type(op_node))
if op_fn is None:
raise UnsafeExpression(f"unsupported comparison: {type(op_node).__name__}")
try:
if not op_fn(left, right):
return False
except TypeError:
return False
left = right
return True
if isinstance(node, ast.Call):
# Only len() is allowed (validated above)
arg = _eval_node(node.args[0], event, context)
try:
return len(arg)
except TypeError:
return 0
if isinstance(node, ast.List):
return [_eval_node(elt, event, context) for elt in node.elts]
if isinstance(node, ast.Tuple):
return tuple(_eval_node(elt, event, context) for elt in node.elts)
raise UnsafeExpression(f"cannot evaluate node type: {type(node).__name__}")
def evaluate_condition(expr: str, event: Any, context: dict) -> bool:
"""Evaluate a rule condition expression safely.
Raises UnsafeExpression at parse time if any non-whitelisted AST node is
found. Returns True for empty expressions (unconditional rule).
Never calls exec() or eval().
"""
if not expr or not expr.strip():
return True
try:
tree = ast.parse(expr.strip(), mode="eval")
except SyntaxError as exc:
raise UnsafeExpression(f"syntax error in condition: {exc}") from exc
_validate(tree)
result = _eval_node(tree, event, context)
return bool(result)

View File

@@ -0,0 +1,167 @@
"""
Instruction executor — LLM-driven task generation with prompt injection protection.
Boundary: no imports from temporalio, sqlalchemy, fastapi, or any
activity_core.* module outside rules/.
"""
from __future__ import annotations
import hashlib
import json
import logging
import re
from typing import Any
from activity_core.rules.evaluator import UnsafeExpression, evaluate_condition
from activity_core.rules.models import TaskSpec
logger = logging.getLogger(__name__)
# Matches {field.path} placeholders in prompt templates.
_PLACEHOLDER_RE = re.compile(r"\{([a-zA-Z_][a-zA-Z0-9_.]*)\}")
class UntrustedFieldError(ValueError):
"""Raised when a prompt placeholder references a field not in trusted_fields."""
def _resolve_path(obj: Any, path: str) -> Any:
"""Walk a dot-separated path on obj or dict. Returns None if not found."""
parts = path.split(".")
current = obj
for part in parts:
if current is None:
return None
if isinstance(current, dict):
current = current.get(part)
else:
current = getattr(current, part, None)
return current
def _render_prompt(prompt: str, trusted_fields: list[str], event: Any, context: dict) -> str:
"""Substitute {field.path} placeholders, validating against trusted_fields.
Raises UntrustedFieldError if a placeholder is not in the allowlist.
Fields whose resolved value is of type object/dict/list are rejected even
if listed in trusted_fields.
"""
def substitute(match: re.Match) -> str:
field_path = match.group(1)
if field_path not in trusted_fields:
raise UntrustedFieldError(
f"prompt references untrusted field: {field_path!r}"
)
# Determine root: event.* or context.*
parts = field_path.split(".", 1)
root = parts[0]
tail = parts[1] if len(parts) > 1 else ""
if root == "event":
value = _resolve_path(event, tail) if tail else event
elif root == "context":
value = _resolve_path(context, tail) if tail else context
else:
raise UntrustedFieldError(f"unknown root in field path: {root!r}")
# Reject object/dict/list values — only scalars allowed in prompts.
if isinstance(value, (dict, list, object.__class__)) and not isinstance(
value, (str, int, float, bool, type(None))
):
raise UntrustedFieldError(
f"field {field_path!r} resolves to a non-scalar type and cannot be "
"injected into a prompt"
)
return str(value) if value is not None else ""
return _PLACEHOLDER_RE.sub(substitute, prompt)
def execute_instruction(
instr: Any,
event: Any,
context: dict,
llm_client: Any,
) -> list[TaskSpec]:
"""Evaluate an Instruction. Returns [] on any failure; never raises.
Steps:
1. Pre-filter: evaluate instr.condition — skip if false.
2. Render prompt — validate trusted_fields allowlist.
3. Call llm_client.complete() with structured output.
4. Validate response against instr.output_schema (JSON Schema). Retry once.
5. Return list[TaskSpec].
"""
try:
return _execute(instr, event, context, llm_client)
except UntrustedFieldError as exc:
logger.warning("instruction %r rejected — %s", instr.id, exc)
return []
except Exception as exc:
logger.warning("instruction %r failed — %s", instr.id, exc)
return []
def _execute(
instr: Any,
event: Any,
context: dict,
llm_client: Any,
) -> list[TaskSpec]:
# Step 1 — pre-filter
try:
if instr.condition and not evaluate_condition(instr.condition, event, context):
return []
except UnsafeExpression as exc:
logger.warning("instruction %r condition is unsafe — %s", instr.id, exc)
return []
# Step 2 — render prompt (raises UntrustedFieldError on policy violation)
rendered = _render_prompt(instr.prompt, instr.trusted_fields, event, context)
prompt_hash = hashlib.sha256(rendered.encode()).hexdigest()
# Step 3 — call LLM
raw_output = llm_client.complete(rendered, model=instr.model)
# Step 4 — validate and optionally retry
task_specs, error = _validate_output(raw_output, instr)
if error:
retry_prompt = rendered + f"\n\nPrevious output was invalid: {error}\nPlease fix."
raw_output = llm_client.complete(retry_prompt, model=instr.model)
task_specs, error = _validate_output(raw_output, instr)
if error:
logger.warning(
"instruction_output_error: instruction=%r, prompt_hash=%s, error=%s",
instr.id, prompt_hash, error,
)
return []
return task_specs
def _validate_output(raw_output: Any, instr: Any) -> tuple[list[TaskSpec], str | None]:
"""Parse raw LLM output into TaskSpec list. Returns (specs, error_message)."""
try:
if isinstance(raw_output, str):
data = json.loads(raw_output)
else:
data = raw_output
if not isinstance(data, list):
data = [data]
specs = []
for item in data:
specs.append(TaskSpec(
title=item.get("title", ""),
description=item.get("description", ""),
target_repo=item.get("target_repo"),
priority=item.get("priority", "medium"),
labels=item.get("labels", []),
due_in_days=item.get("due_in_days"),
source_type="instruction",
source_id=instr.id,
))
return specs, None
except (json.JSONDecodeError, AttributeError, KeyError, TypeError) as exc:
return [], str(exc)

View File

@@ -0,0 +1,35 @@
"""
Domain models for the rules module.
Boundary: no imports from temporalio, sqlalchemy, fastapi, or any
activity_core.* module outside rules/.
"""
from __future__ import annotations
from dataclasses import dataclass, field
@dataclass
class TaskSpec:
"""A task to be emitted via IssueSink. Produced by RuleEvaluator or InstructionExecutor."""
title: str
description: str = ""
target_repo: str | None = None
priority: str = "medium"
labels: list[str] = field(default_factory=list)
due_in_days: int | None = None
source_type: str = "rule" # "rule" | "instruction"
source_id: str = ""
triggering_event_id: str = ""
activity_definition_id: str = ""
@dataclass
class TaskRef:
"""Reference to a task created in an external backend (issue-core)."""
external_id: str
backend_url: str | None = None
backend: str = ""