generated from coulomb/repo-seed
feat: snapshot/restore checkpoints (SAND-WP-0007)
Add workspace checkpoint API with SnapshotStore, extension hooks on compose-ssh and saas-stub, manager orchestration, CLI/HTTP surface, profile.compose-checkpoint, and docs/tests.
This commit is contained in:
@@ -6,17 +6,20 @@ from sandboxer.extensions.registry import load_extension, resolve_backend
|
||||
from sandboxer.lifecycle.state_hub import emit_lifecycle_event, event_type_for_state
|
||||
from sandboxer.lifecycle.store import SandboxStore, utcnow
|
||||
from sandboxer.models import (
|
||||
Consumer,
|
||||
MeterRecord,
|
||||
Reachability,
|
||||
SandboxCreateRequest,
|
||||
SandboxState,
|
||||
SandboxStatus,
|
||||
SnapshotRecord,
|
||||
)
|
||||
from sandboxer.payments.credits import CreditsStore
|
||||
from sandboxer.payments.metering import estimate_cost, settle_usage
|
||||
from sandboxer.placement import resolve_host
|
||||
from sandboxer.profiles.loader import load_profile
|
||||
from sandboxer.routing.resolver import resolve_extension
|
||||
from sandboxer.snapshots.store import SnapshotStore
|
||||
from sandboxer.telemetry.export import export_telemetry
|
||||
from sandboxer.telemetry.introspection import (
|
||||
build_introspection_report,
|
||||
@@ -30,9 +33,27 @@ class SandboxManager:
|
||||
self,
|
||||
store: SandboxStore | None = None,
|
||||
credits: CreditsStore | None = None,
|
||||
snapshots: SnapshotStore | None = None,
|
||||
) -> None:
|
||||
self.store = store or SandboxStore()
|
||||
self.credits = credits or CreditsStore()
|
||||
self.snapshots = snapshots or SnapshotStore()
|
||||
|
||||
@staticmethod
|
||||
def _handle_from_status(status: SandboxStatus) -> dict[str, str]:
|
||||
return {
|
||||
"sandbox_id": status.sandbox_id,
|
||||
"host": status.host or "",
|
||||
"remote_dir": status.reachability.remote_dir if status.reachability else "",
|
||||
"compose_project": status.reachability.compose_project if status.reachability else "",
|
||||
"compose_file": status.inputs.get("compose_file", ""),
|
||||
"ssh_user": status.inputs.get("ssh_user", ""),
|
||||
"compose_cmd": status.inputs.get("compose_cmd", ""),
|
||||
"ssh_port": status.inputs.get("ssh_port", ""),
|
||||
"vm_target": status.inputs.get("vm_target", ""),
|
||||
"vm_host": status.inputs.get("vm_host", ""),
|
||||
"endpoint": status.inputs.get("endpoint", ""),
|
||||
}
|
||||
|
||||
def _resolved_host(self, profile, extension, host_override: str | None) -> str:
|
||||
if extension.capabilities.pricing_model == "metered":
|
||||
@@ -157,19 +178,7 @@ class SandboxManager:
|
||||
self.store.save(status)
|
||||
emit_lifecycle_event(status, event_type=event_type_for_state(status.state))
|
||||
|
||||
handle = {
|
||||
"sandbox_id": status.sandbox_id,
|
||||
"host": status.host or "",
|
||||
"remote_dir": status.reachability.remote_dir if status.reachability else "",
|
||||
"compose_project": status.reachability.compose_project if status.reachability else "",
|
||||
"compose_file": status.inputs.get("compose_file", ""),
|
||||
"ssh_user": status.inputs.get("ssh_user", ""),
|
||||
"compose_cmd": status.inputs.get("compose_cmd", ""),
|
||||
"ssh_port": status.inputs.get("ssh_port", ""),
|
||||
"vm_target": status.inputs.get("vm_target", ""),
|
||||
"vm_host": status.inputs.get("vm_host", ""),
|
||||
"endpoint": status.inputs.get("endpoint", ""),
|
||||
}
|
||||
handle = self._handle_from_status(status)
|
||||
backend.teardown(handle)
|
||||
|
||||
status.state = SandboxState.DESTROYED
|
||||
@@ -218,4 +227,140 @@ class SandboxManager:
|
||||
)
|
||||
if existing.state != SandboxState.DESTROYED:
|
||||
self.destroy(sandbox_id)
|
||||
return self.create(request, host=existing.host)
|
||||
return self.create(request, host=existing.host)
|
||||
|
||||
def snapshot(self, sandbox_id: str, *, name: str | None = None) -> SnapshotRecord:
|
||||
status = self.store.get(sandbox_id)
|
||||
if not status:
|
||||
raise KeyError(f"Sandbox not found: {sandbox_id}")
|
||||
if status.state != SandboxState.READY:
|
||||
raise RuntimeError(
|
||||
f"Sandbox must be ready to snapshot, got {status.state.value}"
|
||||
)
|
||||
|
||||
extension = load_extension(status.extension_id)
|
||||
backend = resolve_backend(extension)
|
||||
if not backend.supports_snapshots():
|
||||
raise RuntimeError(f"Extension {extension.id} does not support snapshots")
|
||||
|
||||
handle = self._handle_from_status(status)
|
||||
meta = backend.snapshot(handle)
|
||||
size_raw = meta.get("size_bytes", "")
|
||||
size_bytes = int(size_raw) if size_raw.isdigit() else None
|
||||
|
||||
record = SnapshotRecord(
|
||||
snapshot_id=meta["snapshot_id"],
|
||||
sandbox_id=sandbox_id,
|
||||
profile_id=status.profile_id,
|
||||
extension_id=status.extension_id,
|
||||
host=status.host or meta.get("host", ""),
|
||||
artifact_path=meta.get("artifact_path", ""),
|
||||
handle=handle,
|
||||
inputs=dict(status.inputs),
|
||||
consumer=status.consumer,
|
||||
name=name,
|
||||
size_bytes=size_bytes,
|
||||
created_at=utcnow(),
|
||||
)
|
||||
self.snapshots.save(record)
|
||||
emit_lifecycle_event(
|
||||
status,
|
||||
summary=f"Snapshot {record.snapshot_id} created from sandbox {sandbox_id}",
|
||||
event_type="milestone",
|
||||
)
|
||||
return record
|
||||
|
||||
def get_snapshot(self, snapshot_id: str) -> SnapshotRecord | None:
|
||||
return self.snapshots.get(snapshot_id)
|
||||
|
||||
def list_snapshots(self, *, sandbox_id: str | None = None) -> list[SnapshotRecord]:
|
||||
items = self.snapshots.list_all()
|
||||
if sandbox_id:
|
||||
items = [s for s in items if s.sandbox_id == sandbox_id]
|
||||
return sorted(items, key=lambda s: s.created_at, reverse=True)
|
||||
|
||||
def restore(
|
||||
self,
|
||||
snapshot_id: str,
|
||||
*,
|
||||
host: str | None = None,
|
||||
consumer: Consumer | None = None,
|
||||
) -> SandboxStatus:
|
||||
record = self.snapshots.get(snapshot_id)
|
||||
if not record:
|
||||
raise KeyError(f"Snapshot not found: {snapshot_id}")
|
||||
|
||||
profile = load_profile(record.profile_id)
|
||||
extension = load_extension(record.extension_id)
|
||||
backend = resolve_backend(extension)
|
||||
if not backend.supports_snapshots():
|
||||
raise RuntimeError(f"Extension {extension.id} does not support restore")
|
||||
|
||||
resolved_host = host or record.host
|
||||
if not resolved_host:
|
||||
resolved_host = resolve_host(profile)
|
||||
use_consumer = consumer or record.consumer
|
||||
if not use_consumer:
|
||||
raise ValueError("consumer required for restore (not stored on snapshot)")
|
||||
|
||||
now = utcnow()
|
||||
status = SandboxStatus(
|
||||
sandbox_id="pending",
|
||||
profile_id=record.profile_id,
|
||||
extension_id=record.extension_id,
|
||||
state=SandboxState.REQUESTED,
|
||||
consumer=use_consumer,
|
||||
host=resolved_host,
|
||||
inputs=dict(record.inputs),
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
)
|
||||
emit_lifecycle_event(status, event_type=event_type_for_state(status.state))
|
||||
|
||||
status.state = SandboxState.PROVISIONING
|
||||
status.updated_at = utcnow()
|
||||
emit_lifecycle_event(status, event_type=event_type_for_state(status.state))
|
||||
|
||||
snapshot_meta = {
|
||||
"snapshot_id": record.snapshot_id,
|
||||
"artifact_path": record.artifact_path,
|
||||
"host": record.host,
|
||||
**record.handle,
|
||||
}
|
||||
try:
|
||||
handle = backend.restore_from_snapshot(
|
||||
profile, snapshot_meta, record.inputs, resolved_host
|
||||
)
|
||||
status.sandbox_id = handle["sandbox_id"]
|
||||
status.inputs["compose_file"] = handle.get("compose_file", "")
|
||||
status.inputs["ssh_user"] = handle.get("ssh_user", "")
|
||||
status.inputs["compose_cmd"] = handle.get("compose_cmd", "")
|
||||
status.inputs["ssh_port"] = handle.get("ssh_port", "")
|
||||
status.inputs["vm_target"] = handle.get("vm_target", "")
|
||||
status.inputs["vm_host"] = handle.get("vm_host", "")
|
||||
status.inputs["endpoint"] = handle.get("endpoint", "")
|
||||
status.inputs["restored_from"] = record.snapshot_id
|
||||
reach = backend.wait_ready(handle)
|
||||
status.reachability = Reachability(**reach)
|
||||
status.state = SandboxState.READY
|
||||
status.ready_at = utcnow()
|
||||
status.updated_at = status.ready_at
|
||||
self.store.save(status)
|
||||
emit_lifecycle_event(
|
||||
status,
|
||||
summary=f"Sandbox restored from snapshot {snapshot_id}",
|
||||
event_type=event_type_for_state(status.state),
|
||||
)
|
||||
return status
|
||||
except Exception as exc:
|
||||
status.state = SandboxState.FAILED
|
||||
status.error = str(exc)
|
||||
status.updated_at = utcnow()
|
||||
if status.sandbox_id != "pending":
|
||||
self.store.save(status)
|
||||
emit_lifecycle_event(
|
||||
status,
|
||||
summary=f"Snapshot restore failed: {exc}",
|
||||
event_type=event_type_for_state(status.state),
|
||||
)
|
||||
raise
|
||||
Reference in New Issue
Block a user