Implement ops inventory probe evidence slice

This commit is contained in:
2026-06-05 23:16:40 +02:00
parent ee1f805c0b
commit 41d3e75a88
17 changed files with 1521 additions and 14 deletions

View File

@@ -1 +1 @@
from activity_core.context_resolvers import repo_scoping, state_hub # noqa: F401
from activity_core.context_resolvers import ops_inventory, repo_scoping, state_hub # noqa: F401

View File

@@ -0,0 +1,322 @@
"""Ops service inventory probe context adapter.
Registered as source type ``ops-inventory``.
The resolver reads the Custodian's non-secret service inventory and performs
bounded HTTP/HTTPS checks for declared endpoints. It deliberately records only
compact probe metadata: stable inventory ids, sanitized endpoint URLs, status
codes, boolean match results, and summary counts.
"""
from __future__ import annotations
import os
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
from urllib.parse import urlsplit, urlunsplit
import httpx
import yaml
from activity_core.context_resolvers.base import CONTEXT_RESOLVER_REGISTRY, ContextResolver
_DEFAULT_INVENTORY_PATH = "/home/worsch/the-custodian/ops/service-inventory.yml"
_DEFAULT_TIMEOUT_SECONDS = 10.0
_SUPPORTED_ENDPOINT_TYPES = {"http", "https"}
class OpsInventoryContextResolver(ContextResolver):
"""Resolve lightweight ops inventory probes from a non-secret YAML file."""
def resolve(self, query: str, event: Any, params: dict[str, Any]) -> dict[str, Any]:
if query != "probe_services":
return {}
return _probe_services(params)
CONTEXT_RESOLVER_REGISTRY["ops-inventory"] = OpsInventoryContextResolver
def _probe_services(params: dict[str, Any]) -> dict[str, Any]:
inventory_path = Path(
str(
params.get("inventory_path")
or os.environ.get("OPS_INVENTORY_PATH")
or _DEFAULT_INVENTORY_PATH
)
)
timeout_seconds = float(params.get("timeout_seconds", _DEFAULT_TIMEOUT_SECONDS))
allow_network = _bool_param(params.get("allow_network", True))
required = _bool_param(params.get("required", False))
include_kinds = _include_kinds(params.get("include_kinds"))
if not inventory_path.exists():
if required:
raise FileNotFoundError(f"ops inventory not found: {inventory_path}")
return _empty_result(
inventory_path,
reason="inventory_not_found",
status="skipped",
skipped=1,
)
inventory = _load_inventory(inventory_path)
raw_services = inventory.get("services")
if not isinstance(raw_services, list):
if required:
raise ValueError("ops inventory missing services list")
return _empty_result(
inventory_path,
reason="invalid_inventory",
status="skipped",
skipped=1,
)
result = _empty_result(inventory_path)
for raw_service in raw_services:
if not isinstance(raw_service, dict):
continue
service = _service_summary(raw_service)
result["services"].append(service)
for endpoint in _endpoint_entries(
raw_service,
include_kinds,
allow_network,
timeout_seconds,
):
result["endpoints"].append(endpoint)
_increment_summary(result["summary"], endpoint["status"])
for access_path in _access_path_entries(raw_service):
result["access_paths"].append(access_path)
_increment_summary(result["summary"], access_path["status"])
return result
def _load_inventory(path: Path) -> dict[str, Any]:
with path.open("r", encoding="utf-8") as handle:
payload = yaml.safe_load(handle) or {}
if not isinstance(payload, dict):
raise ValueError("ops inventory root must be a mapping")
return payload
def _empty_result(
inventory_path: Path,
*,
reason: str | None = None,
status: str | None = None,
skipped: int = 0,
) -> dict[str, Any]:
summary: dict[str, int] = {
"ok": 0,
"degraded": 0,
"down": 0,
"skipped": skipped,
}
result: dict[str, Any] = {
"services": [],
"endpoints": [],
"access_paths": [],
"summary": summary,
"generated_at": datetime.now(timezone.utc).isoformat(),
"inventory_path": str(inventory_path),
}
if reason is not None:
result["reason"] = reason
if status is not None:
result["status"] = status
return result
def _service_summary(service: dict[str, Any]) -> dict[str, Any]:
endpoints = service.get("endpoints") if isinstance(service.get("endpoints"), list) else []
access_paths = (
service.get("access_paths") if isinstance(service.get("access_paths"), list) else []
)
owner_repos = service.get("owner_repos")
return {
"service_id": str(service.get("id") or ""),
"name": str(service.get("name") or service.get("id") or ""),
"kind": str(service.get("kind") or ""),
"environment": str(service.get("environment") or ""),
"lifecycle_state": str(service.get("lifecycle_state") or ""),
"declared_health_status": str(service.get("health_status") or ""),
"owner_repos": owner_repos if isinstance(owner_repos, list) else [],
"endpoint_count": len(endpoints),
"access_path_count": len(access_paths),
}
def _endpoint_entries(
service: dict[str, Any],
include_kinds: set[str],
allow_network: bool,
timeout_seconds: float,
) -> list[dict[str, Any]]:
service_id = str(service.get("id") or "")
service_name = str(service.get("name") or service_id)
raw_endpoints = service.get("endpoints")
if not isinstance(raw_endpoints, list):
return []
entries: list[dict[str, Any]] = []
for raw_endpoint in raw_endpoints:
if not isinstance(raw_endpoint, dict):
continue
endpoint_type = str(raw_endpoint.get("type") or "").lower()
entry = _endpoint_base(service_id, service_name, raw_endpoint, endpoint_type)
if endpoint_type not in include_kinds:
entry.update({"status": "skipped", "reason": "kind_not_included"})
entries.append(entry)
continue
if endpoint_type not in _SUPPORTED_ENDPOINT_TYPES:
entry.update({"status": "skipped", "reason": "unsupported_endpoint_type"})
entries.append(entry)
continue
if not raw_endpoint.get("url"):
entry.update({"status": "skipped", "reason": "missing_url"})
entries.append(entry)
continue
if not allow_network:
entry.update({"status": "skipped", "reason": "network_disabled"})
entries.append(entry)
continue
entry.update(_probe_http_endpoint(raw_endpoint, timeout_seconds))
entries.append(entry)
return entries
def _endpoint_base(
service_id: str,
service_name: str,
endpoint: dict[str, Any],
endpoint_type: str,
) -> dict[str, Any]:
expected_status = endpoint.get("expected_status")
return {
"service_id": service_id,
"service_name": service_name,
"endpoint_id": str(endpoint.get("id") or ""),
"endpoint_type": endpoint_type,
"url": _sanitize_url(str(endpoint.get("url") or "")),
"expected_status": expected_status if isinstance(expected_status, int) else None,
"expected_signal_present": bool(endpoint.get("expected_signal")),
"widget_ref": str(endpoint.get("widget_ref") or ""),
"status": "skipped",
"reason": None,
"status_code": None,
"matched_expected_status": None,
"matched_expected_signal": None,
}
def _probe_http_endpoint(
endpoint: dict[str, Any],
timeout_seconds: float,
) -> dict[str, Any]:
url = str(endpoint.get("url") or "")
expected_status = endpoint.get("expected_status")
expected_signal = endpoint.get("expected_signal")
try:
response = httpx.get(url, timeout=timeout_seconds, follow_redirects=False)
except httpx.HTTPError as exc:
return {
"status": "down",
"reason": type(exc).__name__,
"status_code": None,
"matched_expected_status": False if isinstance(expected_status, int) else None,
"matched_expected_signal": False if expected_signal else None,
}
status_match = (
response.status_code == expected_status
if isinstance(expected_status, int)
else True
)
signal_match = (
str(expected_signal) in response.text
if isinstance(expected_signal, str) and expected_signal
else True
)
status = "ok" if status_match and signal_match else "degraded"
reason = None
if not status_match:
reason = "expected_status_mismatch"
elif not signal_match:
reason = "expected_signal_missing"
return {
"status": status,
"reason": reason,
"status_code": response.status_code,
"matched_expected_status": status_match,
"matched_expected_signal": signal_match,
}
def _access_path_entries(service: dict[str, Any]) -> list[dict[str, Any]]:
service_id = str(service.get("id") or "")
service_name = str(service.get("name") or service_id)
raw_paths = service.get("access_paths")
if not isinstance(raw_paths, list):
return []
entries: list[dict[str, Any]] = []
for index, raw_path in enumerate(raw_paths, start=1):
if not isinstance(raw_path, dict):
continue
path_type = str(raw_path.get("type") or "").lower()
entries.append({
"service_id": service_id,
"service_name": service_name,
"access_path_id": str(raw_path.get("id") or f"{service_id}-access-{index}"),
"access_path_type": path_type,
"declared_status": str(raw_path.get("status") or ""),
"status": "skipped",
"reason": "unsupported_access_path_type",
})
return entries
def _include_kinds(raw: Any) -> set[str]:
if raw is None:
return set(_SUPPORTED_ENDPOINT_TYPES)
if isinstance(raw, str):
return {part.strip().lower() for part in raw.split(",") if part.strip()}
if isinstance(raw, list):
return {str(part).strip().lower() for part in raw if str(part).strip()}
return set(_SUPPORTED_ENDPOINT_TYPES)
def _bool_param(raw: Any) -> bool:
if isinstance(raw, bool):
return raw
if isinstance(raw, str):
return raw.strip().lower() not in {"0", "false", "no", "off"}
return bool(raw)
def _increment_summary(summary: dict[str, int], status: str) -> None:
if status not in summary:
status = "skipped"
summary[status] += 1
def _sanitize_url(raw_url: str) -> str:
if not raw_url:
return ""
parsed = urlsplit(raw_url)
if not parsed.scheme or not parsed.netloc:
return raw_url.split("?", 1)[0].split("#", 1)[0]
hostname = parsed.hostname or ""
if parsed.port is not None:
hostname = f"{hostname}:{parsed.port}"
return urlunsplit((parsed.scheme, hostname, parsed.path, "", ""))