Implement SAND-WP-0008: host telemetry and self-canary

Add profile.sandbox-canary, HostSnapshot/inventory/stale schemas, SSH
collectors, before/after provision deltas, telemetry export to State Hub
and local JSON, default `sandboxer create` self-deploy, inspect/reap-stale
CLI, runbook, and CoulombCore verification (26 tests pass).
This commit is contained in:
2026-06-23 19:53:51 +02:00
parent 582c1dd3c6
commit c0a9261cdc
22 changed files with 1047 additions and 26 deletions

View File

@@ -13,6 +13,12 @@ from sandboxer.models import (
)
from sandboxer.placement import resolve_host
from sandboxer.profiles.loader import load_profile
from sandboxer.telemetry.export import export_telemetry
from sandboxer.telemetry.introspection import (
build_introspection_report,
collect_host_snapshot,
profile_wants_telemetry,
)
class SandboxManager:
@@ -24,6 +30,8 @@ class SandboxManager:
extension = load_extension(profile.extension)
backend = resolve_backend(extension)
resolved_host = resolve_host(profile, override=host)
wants_telemetry = profile_wants_telemetry(profile)
base_dir = extension.config.get("base_dir", "/tmp/sandboxer")
now = utcnow()
status = SandboxStatus(
@@ -43,6 +51,10 @@ class SandboxManager:
status.updated_at = utcnow()
emit_lifecycle_event(status, event_type=event_type_for_state(status.state))
provision_before = None
if wants_telemetry:
provision_before = collect_host_snapshot(resolved_host)
try:
handle = backend.provision(profile, request.inputs, resolved_host)
status.sandbox_id = handle["sandbox_id"]
@@ -54,6 +66,21 @@ class SandboxManager:
status.state = SandboxState.READY
status.ready_at = utcnow()
status.updated_at = status.ready_at
if wants_telemetry and provision_before:
provision_after = collect_host_snapshot(resolved_host)
report = build_introspection_report(
host=resolved_host,
sandbox_id=status.sandbox_id,
profile=profile,
provision_before=provision_before,
provision_after=provision_after,
store=self.store,
base_dir=base_dir,
)
status.telemetry = report.model_dump(mode="json")
export_telemetry(report)
self.store.save(status)
emit_lifecycle_event(status, event_type=event_type_for_state(status.state))
return status
@@ -86,6 +113,12 @@ class SandboxManager:
profile = load_profile(status.profile_id)
extension = load_extension(profile.extension)
backend = resolve_backend(extension)
wants_telemetry = profile_wants_telemetry(profile)
base_dir = extension.config.get("base_dir", "/tmp/sandboxer")
destroy_before = None
if wants_telemetry and status.host:
destroy_before = collect_host_snapshot(status.host)
status.state = SandboxState.DESTROYING
status.updated_at = utcnow()
@@ -106,6 +139,21 @@ class SandboxManager:
status.state = SandboxState.DESTROYED
status.destroyed_at = utcnow()
status.updated_at = status.destroyed_at
if wants_telemetry and destroy_before and status.host:
destroy_after = collect_host_snapshot(status.host)
report = build_introspection_report(
host=status.host,
sandbox_id=status.sandbox_id,
profile=profile,
destroy_before=destroy_before,
destroy_after=destroy_after,
store=self.store,
base_dir=base_dir,
)
status.telemetry = report.model_dump(mode="json")
export_telemetry(report)
self.store.save(status)
emit_lifecycle_event(status, event_type=event_type_for_state(status.state))
return status