Implement ops inventory probe evidence slice

This commit is contained in:
2026-06-05 23:16:40 +02:00
parent ee1f805c0b
commit 41d3e75a88
17 changed files with 1521 additions and 14 deletions

View File

@@ -26,6 +26,7 @@ from activity_core.orm import ActivityDefinition as ActivityDefinitionRow
from activity_core.orm import ActivityRun, TaskInstance, TaskSpawnLog
from activity_core.llm_client import get_llm_client
from activity_core.models import InstructionDef
from activity_core.ops_evidence_sinks import persist_ops_inventory_evidence
from activity_core.report_sinks import persist_reports
from activity_core.rules.actions import expand_rule_actions
from activity_core.rules.executor import execute_instruction_with_audit
@@ -356,6 +357,12 @@ async def persist_instruction_reports(payload: dict) -> list[dict]:
return persist_reports(payload)
@activity.defn
async def persist_ops_evidence(payload: dict) -> list[dict]:
"""Persist compact deterministic ops inventory evidence."""
return persist_ops_inventory_evidence(payload)
@activity.defn
async def emit_tasks(payload: dict) -> list[str]:
"""Emit TaskSpecs to IssueSink and write task_spawn_log rows.

View File

@@ -1 +1 @@
from activity_core.context_resolvers import repo_scoping, state_hub # noqa: F401
from activity_core.context_resolvers import ops_inventory, repo_scoping, state_hub # noqa: F401

View File

@@ -0,0 +1,322 @@
"""Ops service inventory probe context adapter.
Registered as source type ``ops-inventory``.
The resolver reads the Custodian's non-secret service inventory and performs
bounded HTTP/HTTPS checks for declared endpoints. It deliberately records only
compact probe metadata: stable inventory ids, sanitized endpoint URLs, status
codes, boolean match results, and summary counts.
"""
from __future__ import annotations
import os
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
from urllib.parse import urlsplit, urlunsplit
import httpx
import yaml
from activity_core.context_resolvers.base import CONTEXT_RESOLVER_REGISTRY, ContextResolver
_DEFAULT_INVENTORY_PATH = "/home/worsch/the-custodian/ops/service-inventory.yml"
_DEFAULT_TIMEOUT_SECONDS = 10.0
_SUPPORTED_ENDPOINT_TYPES = {"http", "https"}
class OpsInventoryContextResolver(ContextResolver):
"""Resolve lightweight ops inventory probes from a non-secret YAML file."""
def resolve(self, query: str, event: Any, params: dict[str, Any]) -> dict[str, Any]:
if query != "probe_services":
return {}
return _probe_services(params)
CONTEXT_RESOLVER_REGISTRY["ops-inventory"] = OpsInventoryContextResolver
def _probe_services(params: dict[str, Any]) -> dict[str, Any]:
inventory_path = Path(
str(
params.get("inventory_path")
or os.environ.get("OPS_INVENTORY_PATH")
or _DEFAULT_INVENTORY_PATH
)
)
timeout_seconds = float(params.get("timeout_seconds", _DEFAULT_TIMEOUT_SECONDS))
allow_network = _bool_param(params.get("allow_network", True))
required = _bool_param(params.get("required", False))
include_kinds = _include_kinds(params.get("include_kinds"))
if not inventory_path.exists():
if required:
raise FileNotFoundError(f"ops inventory not found: {inventory_path}")
return _empty_result(
inventory_path,
reason="inventory_not_found",
status="skipped",
skipped=1,
)
inventory = _load_inventory(inventory_path)
raw_services = inventory.get("services")
if not isinstance(raw_services, list):
if required:
raise ValueError("ops inventory missing services list")
return _empty_result(
inventory_path,
reason="invalid_inventory",
status="skipped",
skipped=1,
)
result = _empty_result(inventory_path)
for raw_service in raw_services:
if not isinstance(raw_service, dict):
continue
service = _service_summary(raw_service)
result["services"].append(service)
for endpoint in _endpoint_entries(
raw_service,
include_kinds,
allow_network,
timeout_seconds,
):
result["endpoints"].append(endpoint)
_increment_summary(result["summary"], endpoint["status"])
for access_path in _access_path_entries(raw_service):
result["access_paths"].append(access_path)
_increment_summary(result["summary"], access_path["status"])
return result
def _load_inventory(path: Path) -> dict[str, Any]:
with path.open("r", encoding="utf-8") as handle:
payload = yaml.safe_load(handle) or {}
if not isinstance(payload, dict):
raise ValueError("ops inventory root must be a mapping")
return payload
def _empty_result(
inventory_path: Path,
*,
reason: str | None = None,
status: str | None = None,
skipped: int = 0,
) -> dict[str, Any]:
summary: dict[str, int] = {
"ok": 0,
"degraded": 0,
"down": 0,
"skipped": skipped,
}
result: dict[str, Any] = {
"services": [],
"endpoints": [],
"access_paths": [],
"summary": summary,
"generated_at": datetime.now(timezone.utc).isoformat(),
"inventory_path": str(inventory_path),
}
if reason is not None:
result["reason"] = reason
if status is not None:
result["status"] = status
return result
def _service_summary(service: dict[str, Any]) -> dict[str, Any]:
endpoints = service.get("endpoints") if isinstance(service.get("endpoints"), list) else []
access_paths = (
service.get("access_paths") if isinstance(service.get("access_paths"), list) else []
)
owner_repos = service.get("owner_repos")
return {
"service_id": str(service.get("id") or ""),
"name": str(service.get("name") or service.get("id") or ""),
"kind": str(service.get("kind") or ""),
"environment": str(service.get("environment") or ""),
"lifecycle_state": str(service.get("lifecycle_state") or ""),
"declared_health_status": str(service.get("health_status") or ""),
"owner_repos": owner_repos if isinstance(owner_repos, list) else [],
"endpoint_count": len(endpoints),
"access_path_count": len(access_paths),
}
def _endpoint_entries(
service: dict[str, Any],
include_kinds: set[str],
allow_network: bool,
timeout_seconds: float,
) -> list[dict[str, Any]]:
service_id = str(service.get("id") or "")
service_name = str(service.get("name") or service_id)
raw_endpoints = service.get("endpoints")
if not isinstance(raw_endpoints, list):
return []
entries: list[dict[str, Any]] = []
for raw_endpoint in raw_endpoints:
if not isinstance(raw_endpoint, dict):
continue
endpoint_type = str(raw_endpoint.get("type") or "").lower()
entry = _endpoint_base(service_id, service_name, raw_endpoint, endpoint_type)
if endpoint_type not in include_kinds:
entry.update({"status": "skipped", "reason": "kind_not_included"})
entries.append(entry)
continue
if endpoint_type not in _SUPPORTED_ENDPOINT_TYPES:
entry.update({"status": "skipped", "reason": "unsupported_endpoint_type"})
entries.append(entry)
continue
if not raw_endpoint.get("url"):
entry.update({"status": "skipped", "reason": "missing_url"})
entries.append(entry)
continue
if not allow_network:
entry.update({"status": "skipped", "reason": "network_disabled"})
entries.append(entry)
continue
entry.update(_probe_http_endpoint(raw_endpoint, timeout_seconds))
entries.append(entry)
return entries
def _endpoint_base(
service_id: str,
service_name: str,
endpoint: dict[str, Any],
endpoint_type: str,
) -> dict[str, Any]:
expected_status = endpoint.get("expected_status")
return {
"service_id": service_id,
"service_name": service_name,
"endpoint_id": str(endpoint.get("id") or ""),
"endpoint_type": endpoint_type,
"url": _sanitize_url(str(endpoint.get("url") or "")),
"expected_status": expected_status if isinstance(expected_status, int) else None,
"expected_signal_present": bool(endpoint.get("expected_signal")),
"widget_ref": str(endpoint.get("widget_ref") or ""),
"status": "skipped",
"reason": None,
"status_code": None,
"matched_expected_status": None,
"matched_expected_signal": None,
}
def _probe_http_endpoint(
endpoint: dict[str, Any],
timeout_seconds: float,
) -> dict[str, Any]:
url = str(endpoint.get("url") or "")
expected_status = endpoint.get("expected_status")
expected_signal = endpoint.get("expected_signal")
try:
response = httpx.get(url, timeout=timeout_seconds, follow_redirects=False)
except httpx.HTTPError as exc:
return {
"status": "down",
"reason": type(exc).__name__,
"status_code": None,
"matched_expected_status": False if isinstance(expected_status, int) else None,
"matched_expected_signal": False if expected_signal else None,
}
status_match = (
response.status_code == expected_status
if isinstance(expected_status, int)
else True
)
signal_match = (
str(expected_signal) in response.text
if isinstance(expected_signal, str) and expected_signal
else True
)
status = "ok" if status_match and signal_match else "degraded"
reason = None
if not status_match:
reason = "expected_status_mismatch"
elif not signal_match:
reason = "expected_signal_missing"
return {
"status": status,
"reason": reason,
"status_code": response.status_code,
"matched_expected_status": status_match,
"matched_expected_signal": signal_match,
}
def _access_path_entries(service: dict[str, Any]) -> list[dict[str, Any]]:
service_id = str(service.get("id") or "")
service_name = str(service.get("name") or service_id)
raw_paths = service.get("access_paths")
if not isinstance(raw_paths, list):
return []
entries: list[dict[str, Any]] = []
for index, raw_path in enumerate(raw_paths, start=1):
if not isinstance(raw_path, dict):
continue
path_type = str(raw_path.get("type") or "").lower()
entries.append({
"service_id": service_id,
"service_name": service_name,
"access_path_id": str(raw_path.get("id") or f"{service_id}-access-{index}"),
"access_path_type": path_type,
"declared_status": str(raw_path.get("status") or ""),
"status": "skipped",
"reason": "unsupported_access_path_type",
})
return entries
def _include_kinds(raw: Any) -> set[str]:
if raw is None:
return set(_SUPPORTED_ENDPOINT_TYPES)
if isinstance(raw, str):
return {part.strip().lower() for part in raw.split(",") if part.strip()}
if isinstance(raw, list):
return {str(part).strip().lower() for part in raw if str(part).strip()}
return set(_SUPPORTED_ENDPOINT_TYPES)
def _bool_param(raw: Any) -> bool:
if isinstance(raw, bool):
return raw
if isinstance(raw, str):
return raw.strip().lower() not in {"0", "false", "no", "off"}
return bool(raw)
def _increment_summary(summary: dict[str, int], status: str) -> None:
if status not in summary:
status = "skipped"
summary[status] += 1
def _sanitize_url(raw_url: str) -> str:
if not raw_url:
return ""
parsed = urlsplit(raw_url)
if not parsed.scheme or not parsed.netloc:
return raw_url.split("?", 1)[0].split("#", 1)[0]
hostname = parsed.hostname or ""
if parsed.port is not None:
hostname = f"{hostname}:{parsed.port}"
return urlunsplit((parsed.scheme, hostname, parsed.path, "", ""))

View File

@@ -0,0 +1,280 @@
"""Deterministic sinks for ops inventory probe evidence."""
from __future__ import annotations
import os
from typing import Any
import httpx
from activity_core.context_resolvers.ops_inventory import _sanitize_url
_DEFAULT_STATE_HUB_URL = "http://127.0.0.1:8000"
_INTER_HUB_SINK_TYPES = {
"inter-hub",
"inter-hub-event",
"inter-hub-interaction-event",
}
def persist_ops_inventory_evidence(payload: dict[str, Any]) -> list[dict[str, Any]]:
"""Persist compact non-secret ops inventory evidence for configured sources.
The workflow passes all context sources and the resolved context snapshot.
This function filters to ``type: ops-inventory`` sources and only emits
evidence when the source params contain an explicit ``evidence_sinks`` list.
"""
results: list[dict[str, Any]] = []
for source in payload.get("context_sources", []):
if not isinstance(source, dict) or source.get("type") != "ops-inventory":
continue
params = source.get("params") or {}
sinks = _normalise_sinks(params.get("evidence_sinks") or params.get("evidence_sink"))
if not sinks:
continue
bind_key = _context_bind_key(source)
probe_result = (payload.get("context") or {}).get(bind_key)
if not isinstance(probe_result, dict):
results.extend(
{
"type": sink.get("type", "unknown"),
"status": "skipped",
"reason": "missing_probe_result",
"context_key": bind_key,
}
for sink in sinks
)
continue
for sink in sinks:
sink_type = sink.get("type")
try:
if sink_type == "state-hub-progress":
results.append(
_post_state_hub_progress(payload, bind_key, probe_result, sink)
)
elif sink_type in _INTER_HUB_SINK_TYPES:
results.append(_inter_hub_result(sink))
else:
results.append({
"type": sink_type or "unknown",
"status": "skipped",
"reason": "unknown_sink_type",
"context_key": bind_key,
})
except Exception as exc:
results.append({
"type": sink_type or "unknown",
"status": "error",
"error": str(exc),
"context_key": bind_key,
})
errors = [result for result in results if result.get("status") == "error"]
if errors:
raise RuntimeError(f"ops evidence sink failure: {errors!r}")
return results
def _post_state_hub_progress(
payload: dict[str, Any],
context_key: str,
probe_result: dict[str, Any],
sink: dict[str, Any],
) -> dict[str, Any]:
base_url = sink.get("state_hub_url") or os.environ.get("STATE_HUB_URL", _DEFAULT_STATE_HUB_URL)
base_url = str(base_url).rstrip("/")
event_type = sink.get("event_type", "ops_inventory_probe")
run_id = payload["run_id"]
idempotency_key = f"{run_id}:{context_key}:{event_type}"
if _progress_exists(base_url, event_type, idempotency_key):
return {
"type": "state-hub-progress",
"status": "exists",
"event_type": event_type,
"idempotency_key": idempotency_key,
"context_key": context_key,
}
compact = _compact_probe_result(probe_result)
body: dict[str, Any] = {
"event_type": event_type,
"author": sink.get("author", "activity-core"),
"summary": _summary_text(compact.get("summary", {})),
"detail": {
"activity_id": payload.get("activity_id"),
"activity_core_run_id": run_id,
"scheduled_for": payload.get("scheduled_for"),
"source_type": "ops-inventory",
"context_key": context_key,
"idempotency_key": idempotency_key,
"probe": compact,
},
}
for key in ("topic_id", "workstream_id", "task_id", "decision_id"):
if sink.get(key):
body[key] = sink[key]
resp = httpx.post(
f"{base_url}/progress/",
json=body,
timeout=float(sink.get("timeout_seconds", 10.0)),
)
resp.raise_for_status()
data = resp.json()
return {
"type": "state-hub-progress",
"status": "posted",
"event_type": event_type,
"progress_id": data.get("id"),
"idempotency_key": idempotency_key,
"context_key": context_key,
}
def _progress_exists(base_url: str, event_type: str, idempotency_key: str) -> bool:
resp = httpx.get(
f"{base_url}/progress/",
params={"limit": 100},
timeout=10.0,
)
resp.raise_for_status()
for item in resp.json():
detail = item.get("detail") or {}
if (
item.get("event_type") == event_type
and detail.get("idempotency_key") == idempotency_key
):
return True
return False
def _inter_hub_result(sink: dict[str, Any]) -> dict[str, Any]:
missing: list[str] = []
if not (sink.get("inter_hub_url") or os.environ.get("INTER_HUB_URL")):
missing.append("INTER_HUB_URL")
if not os.environ.get("OPS_HUB_KEY"):
missing.append("OPS_HUB_KEY")
if not (sink.get("widget_mapping") or sink.get("capability_mapping")):
missing.append("widget_mapping")
if missing:
return {
"type": sink.get("type"),
"status": "skipped",
"reason": "missing_inter_hub_config",
"missing": missing,
}
return {
"type": sink.get("type"),
"status": "skipped",
"reason": "inter_hub_sink_deferred",
}
def _compact_probe_result(probe_result: dict[str, Any]) -> dict[str, Any]:
return {
"generated_at": probe_result.get("generated_at"),
"inventory_path": probe_result.get("inventory_path"),
"status": probe_result.get("status"),
"reason": probe_result.get("reason"),
"summary": _compact_summary(probe_result.get("summary")),
"services": [
_compact_service(service)
for service in probe_result.get("services", [])
if isinstance(service, dict)
],
"endpoints": [
_compact_endpoint(endpoint)
for endpoint in probe_result.get("endpoints", [])
if isinstance(endpoint, dict)
],
"access_paths": [
_compact_access_path(access_path)
for access_path in probe_result.get("access_paths", [])
if isinstance(access_path, dict)
],
}
def _compact_summary(raw: Any) -> dict[str, int]:
if not isinstance(raw, dict):
raw = {}
return {
"ok": int(raw.get("ok", 0) or 0),
"degraded": int(raw.get("degraded", 0) or 0),
"down": int(raw.get("down", 0) or 0),
"skipped": int(raw.get("skipped", 0) or 0),
}
def _compact_service(service: dict[str, Any]) -> dict[str, Any]:
return {
"service_id": service.get("service_id"),
"name": service.get("name"),
"kind": service.get("kind"),
"environment": service.get("environment"),
"lifecycle_state": service.get("lifecycle_state"),
"declared_health_status": service.get("declared_health_status"),
"owner_repos": service.get("owner_repos") if isinstance(service.get("owner_repos"), list) else [],
"endpoint_count": service.get("endpoint_count"),
"access_path_count": service.get("access_path_count"),
}
def _compact_endpoint(endpoint: dict[str, Any]) -> dict[str, Any]:
return {
"service_id": endpoint.get("service_id"),
"service_name": endpoint.get("service_name"),
"endpoint_id": endpoint.get("endpoint_id"),
"endpoint_type": endpoint.get("endpoint_type"),
"url": _sanitize_url(str(endpoint.get("url") or "")),
"expected_status": endpoint.get("expected_status"),
"expected_signal_present": bool(endpoint.get("expected_signal_present")),
"widget_ref": endpoint.get("widget_ref"),
"status": endpoint.get("status"),
"reason": endpoint.get("reason"),
"status_code": endpoint.get("status_code"),
"matched_expected_status": endpoint.get("matched_expected_status"),
"matched_expected_signal": endpoint.get("matched_expected_signal"),
}
def _compact_access_path(access_path: dict[str, Any]) -> dict[str, Any]:
return {
"service_id": access_path.get("service_id"),
"service_name": access_path.get("service_name"),
"access_path_id": access_path.get("access_path_id"),
"access_path_type": access_path.get("access_path_type"),
"declared_status": access_path.get("declared_status"),
"status": access_path.get("status"),
"reason": access_path.get("reason"),
}
def _summary_text(summary: dict[str, Any]) -> str:
return (
"Ops inventory probe: "
f"{summary.get('ok', 0)} ok, "
f"{summary.get('degraded', 0)} degraded, "
f"{summary.get('down', 0)} down, "
f"{summary.get('skipped', 0)} skipped"
)
def _context_bind_key(source: dict[str, Any]) -> str:
raw_bind = source.get("bind_to") or source.get("name") or source.get("type", "")
return raw_bind.removeprefix("context.") if raw_bind.startswith("context.") else raw_bind
def _normalise_sinks(raw: Any) -> list[dict[str, Any]]:
if raw is None:
return []
if isinstance(raw, dict):
return [raw]
if isinstance(raw, list):
return [sink for sink in raw if isinstance(sink, dict)]
return []

View File

@@ -40,6 +40,7 @@ from activity_core.activities import (
load_activity_definition,
log_run,
persist_instruction_reports,
persist_ops_evidence,
persist_task_instance,
resolve_context,
)
@@ -102,6 +103,7 @@ async def run() -> None:
evaluate_rules,
evaluate_instructions,
persist_instruction_reports,
persist_ops_evidence,
emit_tasks,
],
)

View File

@@ -26,6 +26,7 @@ with workflow.unsafe.imports_passed_through():
load_activity_definition,
log_run,
persist_instruction_reports,
persist_ops_evidence,
persist_task_instance,
resolve_context,
)
@@ -105,6 +106,26 @@ class RunActivityWorkflow:
retry_policy=_RETRY_POLICY,
)
if trigger_key == SCHEDULED_TRIGGER_KEY:
dedup_source = workflow.info().workflow_id
else:
dedup_source = f"{activity_id}:{trigger_key}"
run_id = str(uuid.uuid5(uuid.NAMESPACE_URL, dedup_source))
await workflow.execute_activity(
persist_ops_evidence,
{
"context_sources": defn.get("context_sources", []),
"context": context_snapshot,
"activity_id": activity_id,
"run_id": run_id,
"scheduled_for": scheduled_for,
"version_used": defn["version"],
},
start_to_close_timeout=_ACTIVITY_TIMEOUT,
retry_policy=_RETRY_POLICY,
)
# ── 3. Evaluate rules ─────────────────────────────────────────────────
import json as _json
event_attrs: dict = {}
@@ -140,12 +161,6 @@ class RunActivityWorkflow:
task_spec_dicts.extend(instruction_result.get("task_specs", []))
report_dicts.extend(instruction_result.get("reports", []))
if trigger_key == SCHEDULED_TRIGGER_KEY:
dedup_source = workflow.info().workflow_id
else:
dedup_source = f"{activity_id}:{trigger_key}"
run_id = str(uuid.uuid5(uuid.NAMESPACE_URL, dedup_source))
# ── 4. Persist reports and emit tasks ────────────────────────────────
if report_dicts:
await workflow.execute_activity(