From 952cebf2e93be5d0bded7dd67168178bbdb76c9a Mon Sep 17 00:00:00 2001 From: tegwick Date: Wed, 24 Jun 2026 07:57:40 +0200 Subject: [PATCH] feat: snapshot/restore checkpoints (SAND-WP-0007) Add workspace checkpoint API with SnapshotStore, extension hooks on compose-ssh and saas-stub, manager orchestration, CLI/HTTP surface, profile.compose-checkpoint, and docs/tests. --- SCOPE.md | 9 +- docs/extension-sdk.md | 10 +- docs/meta-framework.md | 7 +- docs/migration-gaps.md | 2 +- docs/snapshots.md | 47 ++++++ extensions/ext.compose-ssh.yaml | 2 +- profiles/profile.compose-checkpoint.yaml | 31 ++++ src/sandboxer/api/app.py | 49 +++++- src/sandboxer/cli.py | 54 +++++++ src/sandboxer/core/manager.py | 173 +++++++++++++++++++-- src/sandboxer/extensions/base.py | 20 ++- src/sandboxer/extensions/compose_ssh.py | 84 ++++++++++ src/sandboxer/extensions/saas_stub.py | 27 ++++ src/sandboxer/models.py | 22 ++- src/sandboxer/snapshots/__init__.py | 5 + src/sandboxer/snapshots/store.py | 47 ++++++ tests/test_api.py | 46 +++++- tests/test_compose_ssh.py | 87 ++++++++++- tests/test_extension_base.py | 21 ++- tests/test_snapshots.py | 172 ++++++++++++++++++++ workplans/SAND-WP-0007-snapshot-restore.md | 85 ++++++++++ 21 files changed, 966 insertions(+), 34 deletions(-) create mode 100644 docs/snapshots.md create mode 100644 profiles/profile.compose-checkpoint.yaml create mode 100644 src/sandboxer/snapshots/__init__.py create mode 100644 src/sandboxer/snapshots/store.py create mode 100644 tests/test_snapshots.py create mode 100644 workplans/SAND-WP-0007-snapshot-restore.md diff --git a/SCOPE.md b/SCOPE.md index 5b03fdb..5f4ba82 100644 --- a/SCOPE.md +++ b/SCOPE.md @@ -1,7 +1,7 @@ --- domain: infotech repo: sand-boxer -updated: "2026-06-23" +updated: "2026-06-24" --- # SCOPE @@ -42,8 +42,9 @@ Lineage: provision/teardown extracted from `the-custodian/e2e-framework/`; ## In Scope - **Unified establishment API** — CLI v0 + HTTP stub (`create`, `get`, `list`, - `destroy`, `recreate`); fuller surface (`extend_ttl`, `snapshot`) planned -- **Profile catalog** — `profile.compose-e2e`, `profile.sandbox-canary`; more + `destroy`, `recreate`, `snapshot`, `restore`); `extend_ttl` planned +- **Profile catalog** — `profile.compose-e2e`, `profile.compose-checkpoint`, + `profile.sandbox-canary`; more profiles and extensions over time - **Extension platform** — `ext.compose-ssh` (SSH + compose); plugin contract in `docs/meta-framework.md` @@ -146,7 +147,7 @@ cd ~/the-custodian && make e2e REPO=activity-core - TTL auto-expiry / `extend_ttl` enforcement - ~~`ext.vm-packer` attach mode~~ — done (SAND-WP-0005); Packer build orchestration deferred - Real E2B / Modal adapters (stub + payments v0 done in SAND-WP-0006) -- Snapshot / restore / checkpoint profiles (SAND-WP-0007) +- ~~Snapshot / restore / checkpoint profiles~~ — done (SAND-WP-0007) - Formal ops-bridge tunnel attachment in reachability descriptor - Dedicated sandboxer01 host (CoulombCore interim only today) - `reuse-surface validate` / federation publish workflow diff --git a/docs/extension-sdk.md b/docs/extension-sdk.md index 2a4a90f..21c0d51 100644 --- a/docs/extension-sdk.md +++ b/docs/extension-sdk.md @@ -12,7 +12,10 @@ wait_ready(handle) → reachability dict teardown(handle) → cleanup report dict ``` -Optional (SaaS, deferred): `estimate_cost(profile, duration) → MeterQuote` +Optional (SaaS): `estimate_cost(profile, duration) → MeterQuote` + +Optional (checkpoints): `supports_snapshots()`, `snapshot(handle)`, +`restore_from_snapshot(profile, snapshot_meta, inputs, host)` ### Base class @@ -29,8 +32,9 @@ Reference implementations: | Extension | Module | Mode | |-----------|--------|------| -| `ext.compose-ssh` | `compose_ssh.py` | Remote compose stack | +| `ext.compose-ssh` | `compose_ssh.py` | Remote compose stack + tar snapshots | | `ext.vm-packer` | `vm_packer.py` | Attach workspace on pre-built VM | +| `ext.saas-stub` | `saas_stub.py` | Metered stub + metadata snapshots | ## Registration @@ -104,4 +108,4 @@ Implement `estimate_cost` and `meter_actual` on `SandboxExtension`. Register wit | Packer build orchestration from `create` | Future WP | | E2B / Modal / Daytona cloud adapters | Post SAND-WP-0006 | | fin-hub billing export | Future | -| Snapshot / restore hooks | SAND-WP-0007 | \ No newline at end of file +| Cross-host snapshot transfer | Future | \ No newline at end of file diff --git a/docs/meta-framework.md b/docs/meta-framework.md index a323c67..301a824 100644 --- a/docs/meta-framework.md +++ b/docs/meta-framework.md @@ -16,7 +16,7 @@ agent harnessing, validation, and code generation. | **Extension** | Backend adapter implementing provision / wait_ready / teardown | | **Host** | Registered placement target for self-hosted extensions; read-only telemetry via `profile.sandbox-canary` (see `docs/host-telemetry.md`) | | **Sandbox** | Running instance of a profile | -| **Snapshot** | Point-in-time workspace checkpoint (deferred — SAND-WP-0003) | +| **Snapshot** | Point-in-time workspace checkpoint (`sandboxer snapshot` / `restore`) | | **Route** | Extension selection policy when multiple backends qualify | | **Meter** | Usage record for payments layer (SaaS extensions — SAND-WP-0006) | @@ -85,7 +85,7 @@ Extends the `build-agent` self-register pattern: generic sandbox identities carr | `extend_ttl` | Extend time-to-live | Stub | | `recreate` | Destroy and reprovision from stored seed | **Yes** | | `destroy` | Idempotent teardown | **Yes** | -| `snapshot` / `restore` | Checkpoint workspace | Deferred (SAND-WP-0003) | +| `snapshot` / `restore` | Checkpoint workspace | **Yes** (compose-ssh, saas-stub) | | `exec` | Run command in sandbox | Harness-owned via SSH (glas-harness) | HTTP surface (optional v0; CLI calls core library directly): @@ -94,6 +94,9 @@ HTTP surface (optional v0; CLI calls core library directly): - `GET /v1/sandboxes/{id}` — get - `GET /v1/sandboxes` — list - `DELETE /v1/sandboxes/{id}` — destroy +- `POST /v1/sandboxes/{id}/snapshot` — checkpoint +- `POST /v1/snapshots/{id}/restore` — restore +- `GET /v1/snapshots` — list checkpoints --- diff --git a/docs/migration-gaps.md b/docs/migration-gaps.md index ea4be12..0ce68a6 100644 --- a/docs/migration-gaps.md +++ b/docs/migration-gaps.md @@ -45,5 +45,5 @@ Deferred: Packer orchestration from API, `make remote-build` shim. |------|----------| | ~~SaaS extensions + payments v0~~ | SAND-WP-0006 — stub + routing + credits | | E2B / Modal real adapters | Post SAND-WP-0006 | -| Snapshot / restore | SAND-WP-0007 | +| ~~Snapshot / restore~~ | SAND-WP-0007 — `docs/snapshots.md` | | TTL enforcement + scheduled reap | TBD | \ No newline at end of file diff --git a/docs/snapshots.md b/docs/snapshots.md new file mode 100644 index 0000000..3c15117 --- /dev/null +++ b/docs/snapshots.md @@ -0,0 +1,47 @@ +# Workspace snapshots + +Point-in-time workspace checkpoints — SAND-WP-0007. + +## Overview + +Snapshots capture the remote workspace state of a **ready** sandbox without +destroying it. Restore provisions a **new** sandbox from the checkpoint. + +| Operation | CLI | HTTP | +|-----------|-----|------| +| Create checkpoint | `sandboxer snapshot ` | `POST /v1/sandboxes/{id}/snapshot` | +| Restore | `sandboxer restore ` | `POST /v1/snapshots/{id}/restore` | +| List | `sandboxer snapshots list` | `GET /v1/snapshots` | +| Get | `sandboxer snapshots get ` | `GET /v1/snapshots/{id}` | + +Snapshot metadata is stored at `~/.local/share/sandboxer/snapshots.json`. +Extension artifacts (e.g. tarballs) live on the placement host. + +## Profile + +`profile.compose-checkpoint` binds `ext.compose-ssh` for checkpoint-enabled +compose sandboxes. Use the same `inputs.repo` convention as `profile.compose-e2e`. + +## ext.compose-ssh behavior + +1. **Snapshot** — `tar czf` of `remote_dir` to `{base_dir}/snapshots/{id}.tar.gz` +2. **Restore** — new `sandbox_id`, extract tarball, `compose up -d` + +Cross-host restore is not supported in v0 (artifact must be on the target host). + +## ext.saas-stub + +Metadata-only checkpoints for routing and payments tests. Restore reprovisions +a fresh stub endpoint. + +## Extension contract + +Optional hooks on `SandboxExtension`: + +```python +def supports_snapshots(self) -> bool: ... +def snapshot(self, handle) -> dict[str, str]: ... +def restore_from_snapshot(self, profile, snapshot_meta, inputs, host) -> dict[str, str]: ... +``` + +See `docs/extension-sdk.md`. \ No newline at end of file diff --git a/extensions/ext.compose-ssh.yaml b/extensions/ext.compose-ssh.yaml index f581821..d714123 100644 --- a/extensions/ext.compose-ssh.yaml +++ b/extensions/ext.compose-ssh.yaml @@ -8,7 +8,7 @@ handler: sandboxer.extensions.compose_ssh:ComposeSSHExtension capabilities: isolation_levels: [container] regions: [] - persistence: false + persistence: true pricing_model: self-hosted config: base_dir: /tmp/sandboxer diff --git a/profiles/profile.compose-checkpoint.yaml b/profiles/profile.compose-checkpoint.yaml new file mode 100644 index 0000000..84d41f4 --- /dev/null +++ b/profiles/profile.compose-checkpoint.yaml @@ -0,0 +1,31 @@ +id: profile.compose-checkpoint +version: "1.0.0" +extension: ext.compose-ssh +isolation: + level: container +network: + default: deny + egress: [] +workspace: + mode: remote-canonical + access: rw +scope_default: session +ttl: + default: 4h + max: 24h + idle_reap: null +resources: + cpu: null + memory_mb: null +setup: + instructions: "Use sandboxer snapshot/restore for workspace checkpoints." + secret_refs: [] +placement: + prefer: [sandboxer01] + fallback: [coulombcore] +reachability: + tunnel: ops-bridge + identity: ops-warden +metadata: + cost_class: self-hosted + latency_class: standard \ No newline at end of file diff --git a/src/sandboxer/api/app.py b/src/sandboxer/api/app.py index f17ee24..8138425 100644 --- a/src/sandboxer/api/app.py +++ b/src/sandboxer/api/app.py @@ -5,7 +5,12 @@ from __future__ import annotations from fastapi import FastAPI, HTTPException from sandboxer.core.manager import SandboxManager -from sandboxer.models import SandboxCreateRequest, SandboxStatus +from sandboxer.models import ( + SandboxCreateRequest, + SandboxStatus, + SnapshotRecord, + SnapshotRestoreRequest, +) app = FastAPI(title="sand-boxer", version="0.0.0") _manager = SandboxManager() @@ -37,4 +42,44 @@ def destroy_sandbox(sandbox_id: str) -> SandboxStatus: try: return _manager.destroy(sandbox_id) except KeyError as exc: - raise HTTPException(status_code=404, detail=str(exc)) from exc \ No newline at end of file + raise HTTPException(status_code=404, detail=str(exc)) from exc + + +@app.post("/v1/sandboxes/{sandbox_id}/snapshot", response_model=SnapshotRecord) +def snapshot_sandbox( + sandbox_id: str, + name: str | None = None, +) -> SnapshotRecord: + try: + return _manager.snapshot(sandbox_id, name=name) + except KeyError as exc: + raise HTTPException(status_code=404, detail=str(exc)) from exc + except RuntimeError as exc: + raise HTTPException(status_code=400, detail=str(exc)) from exc + + +@app.post("/v1/snapshots/{snapshot_id}/restore", response_model=SandboxStatus) +def restore_snapshot( + snapshot_id: str, + request: SnapshotRestoreRequest | None = None, +) -> SandboxStatus: + req = request or SnapshotRestoreRequest() + try: + return _manager.restore(snapshot_id, host=req.host, consumer=req.consumer) + except KeyError as exc: + raise HTTPException(status_code=404, detail=str(exc)) from exc + except (ValueError, Exception) as exc: + raise HTTPException(status_code=400, detail=str(exc)) from exc + + +@app.get("/v1/snapshots", response_model=list[SnapshotRecord]) +def list_snapshots(sandbox_id: str | None = None) -> list[SnapshotRecord]: + return _manager.list_snapshots(sandbox_id=sandbox_id) + + +@app.get("/v1/snapshots/{snapshot_id}", response_model=SnapshotRecord) +def get_snapshot(snapshot_id: str) -> SnapshotRecord: + record = _manager.get_snapshot(snapshot_id) + if not record: + raise HTTPException(status_code=404, detail="snapshot not found") + return record \ No newline at end of file diff --git a/src/sandboxer/cli.py b/src/sandboxer/cli.py index f4031b8..925e2b7 100644 --- a/src/sandboxer/cli.py +++ b/src/sandboxer/cli.py @@ -28,6 +28,8 @@ inspect_app = typer.Typer(help="Host introspection without provisioning.") app.add_typer(inspect_app, name="inspect") credits_app = typer.Typer(help="SaaS sandbox credits (metered extensions).") app.add_typer(credits_app, name="credits") +snapshots_app = typer.Typer(help="Workspace checkpoint snapshots.") +app.add_typer(snapshots_app, name="snapshots") @app.callback() @@ -142,6 +144,58 @@ def sandbox_destroy(sandbox_id: str) -> None: _print_telemetry_summary(status.telemetry) +@app.command("snapshot") +def sandbox_snapshot( + sandbox_id: str, + name: Annotated[str | None, typer.Option(help="Optional snapshot label")] = None, +) -> None: + """Create a workspace checkpoint from a ready sandbox.""" + manager = SandboxManager() + try: + record = manager.snapshot(sandbox_id, name=name) + except (KeyError, RuntimeError) as exc: + typer.echo(f"Error: {exc}", err=True) + raise typer.Exit(code=1) from exc + _print_json(record.model_dump(mode="json")) + + +@app.command("restore") +def sandbox_restore( + snapshot_id: str, + host: Annotated[str | None, typer.Option(help="Override placement host")] = None, + actor: Annotated[str, typer.Option(help="Consumer actor type")] = "adm", + project: Annotated[str, typer.Option(help="Calling project id")] = "sand-boxer", +) -> None: + """Provision a new sandbox from a snapshot checkpoint.""" + manager = SandboxManager() + consumer = Consumer(actor=ActorType(actor), project=project) + try: + status = manager.restore(snapshot_id, host=host, consumer=consumer) + except (KeyError, ValueError, Exception) as exc: + typer.echo(f"Error: {exc}", err=True) + raise typer.Exit(code=1) from exc + _print_json(status.model_dump(mode="json")) + + +@snapshots_app.command("list") +def snapshots_list( + sandbox_id: Annotated[str | None, typer.Option(help="Filter by source sandbox")] = None, +) -> None: + """List stored snapshot checkpoints.""" + items = SandboxManager().list_snapshots(sandbox_id=sandbox_id) + _print_json([s.model_dump(mode="json") for s in items]) + + +@snapshots_app.command("get") +def snapshots_get(snapshot_id: str) -> None: + """Get snapshot metadata by id.""" + record = SandboxManager().get_snapshot(snapshot_id) + if not record: + typer.echo(f"Snapshot not found: {snapshot_id}", err=True) + raise typer.Exit(code=1) + _print_json(record.model_dump(mode="json")) + + @app.command("recreate") def sandbox_recreate(sandbox_id: str) -> None: """Destroy and reprovision from stored inputs.""" diff --git a/src/sandboxer/core/manager.py b/src/sandboxer/core/manager.py index 502e88f..87b2933 100644 --- a/src/sandboxer/core/manager.py +++ b/src/sandboxer/core/manager.py @@ -6,17 +6,20 @@ from sandboxer.extensions.registry import load_extension, resolve_backend from sandboxer.lifecycle.state_hub import emit_lifecycle_event, event_type_for_state from sandboxer.lifecycle.store import SandboxStore, utcnow from sandboxer.models import ( + Consumer, MeterRecord, Reachability, SandboxCreateRequest, SandboxState, SandboxStatus, + SnapshotRecord, ) from sandboxer.payments.credits import CreditsStore from sandboxer.payments.metering import estimate_cost, settle_usage from sandboxer.placement import resolve_host from sandboxer.profiles.loader import load_profile from sandboxer.routing.resolver import resolve_extension +from sandboxer.snapshots.store import SnapshotStore from sandboxer.telemetry.export import export_telemetry from sandboxer.telemetry.introspection import ( build_introspection_report, @@ -30,9 +33,27 @@ class SandboxManager: self, store: SandboxStore | None = None, credits: CreditsStore | None = None, + snapshots: SnapshotStore | None = None, ) -> None: self.store = store or SandboxStore() self.credits = credits or CreditsStore() + self.snapshots = snapshots or SnapshotStore() + + @staticmethod + def _handle_from_status(status: SandboxStatus) -> dict[str, str]: + return { + "sandbox_id": status.sandbox_id, + "host": status.host or "", + "remote_dir": status.reachability.remote_dir if status.reachability else "", + "compose_project": status.reachability.compose_project if status.reachability else "", + "compose_file": status.inputs.get("compose_file", ""), + "ssh_user": status.inputs.get("ssh_user", ""), + "compose_cmd": status.inputs.get("compose_cmd", ""), + "ssh_port": status.inputs.get("ssh_port", ""), + "vm_target": status.inputs.get("vm_target", ""), + "vm_host": status.inputs.get("vm_host", ""), + "endpoint": status.inputs.get("endpoint", ""), + } def _resolved_host(self, profile, extension, host_override: str | None) -> str: if extension.capabilities.pricing_model == "metered": @@ -157,19 +178,7 @@ class SandboxManager: self.store.save(status) emit_lifecycle_event(status, event_type=event_type_for_state(status.state)) - handle = { - "sandbox_id": status.sandbox_id, - "host": status.host or "", - "remote_dir": status.reachability.remote_dir if status.reachability else "", - "compose_project": status.reachability.compose_project if status.reachability else "", - "compose_file": status.inputs.get("compose_file", ""), - "ssh_user": status.inputs.get("ssh_user", ""), - "compose_cmd": status.inputs.get("compose_cmd", ""), - "ssh_port": status.inputs.get("ssh_port", ""), - "vm_target": status.inputs.get("vm_target", ""), - "vm_host": status.inputs.get("vm_host", ""), - "endpoint": status.inputs.get("endpoint", ""), - } + handle = self._handle_from_status(status) backend.teardown(handle) status.state = SandboxState.DESTROYED @@ -218,4 +227,140 @@ class SandboxManager: ) if existing.state != SandboxState.DESTROYED: self.destroy(sandbox_id) - return self.create(request, host=existing.host) \ No newline at end of file + return self.create(request, host=existing.host) + + def snapshot(self, sandbox_id: str, *, name: str | None = None) -> SnapshotRecord: + status = self.store.get(sandbox_id) + if not status: + raise KeyError(f"Sandbox not found: {sandbox_id}") + if status.state != SandboxState.READY: + raise RuntimeError( + f"Sandbox must be ready to snapshot, got {status.state.value}" + ) + + extension = load_extension(status.extension_id) + backend = resolve_backend(extension) + if not backend.supports_snapshots(): + raise RuntimeError(f"Extension {extension.id} does not support snapshots") + + handle = self._handle_from_status(status) + meta = backend.snapshot(handle) + size_raw = meta.get("size_bytes", "") + size_bytes = int(size_raw) if size_raw.isdigit() else None + + record = SnapshotRecord( + snapshot_id=meta["snapshot_id"], + sandbox_id=sandbox_id, + profile_id=status.profile_id, + extension_id=status.extension_id, + host=status.host or meta.get("host", ""), + artifact_path=meta.get("artifact_path", ""), + handle=handle, + inputs=dict(status.inputs), + consumer=status.consumer, + name=name, + size_bytes=size_bytes, + created_at=utcnow(), + ) + self.snapshots.save(record) + emit_lifecycle_event( + status, + summary=f"Snapshot {record.snapshot_id} created from sandbox {sandbox_id}", + event_type="milestone", + ) + return record + + def get_snapshot(self, snapshot_id: str) -> SnapshotRecord | None: + return self.snapshots.get(snapshot_id) + + def list_snapshots(self, *, sandbox_id: str | None = None) -> list[SnapshotRecord]: + items = self.snapshots.list_all() + if sandbox_id: + items = [s for s in items if s.sandbox_id == sandbox_id] + return sorted(items, key=lambda s: s.created_at, reverse=True) + + def restore( + self, + snapshot_id: str, + *, + host: str | None = None, + consumer: Consumer | None = None, + ) -> SandboxStatus: + record = self.snapshots.get(snapshot_id) + if not record: + raise KeyError(f"Snapshot not found: {snapshot_id}") + + profile = load_profile(record.profile_id) + extension = load_extension(record.extension_id) + backend = resolve_backend(extension) + if not backend.supports_snapshots(): + raise RuntimeError(f"Extension {extension.id} does not support restore") + + resolved_host = host or record.host + if not resolved_host: + resolved_host = resolve_host(profile) + use_consumer = consumer or record.consumer + if not use_consumer: + raise ValueError("consumer required for restore (not stored on snapshot)") + + now = utcnow() + status = SandboxStatus( + sandbox_id="pending", + profile_id=record.profile_id, + extension_id=record.extension_id, + state=SandboxState.REQUESTED, + consumer=use_consumer, + host=resolved_host, + inputs=dict(record.inputs), + created_at=now, + updated_at=now, + ) + emit_lifecycle_event(status, event_type=event_type_for_state(status.state)) + + status.state = SandboxState.PROVISIONING + status.updated_at = utcnow() + emit_lifecycle_event(status, event_type=event_type_for_state(status.state)) + + snapshot_meta = { + "snapshot_id": record.snapshot_id, + "artifact_path": record.artifact_path, + "host": record.host, + **record.handle, + } + try: + handle = backend.restore_from_snapshot( + profile, snapshot_meta, record.inputs, resolved_host + ) + status.sandbox_id = handle["sandbox_id"] + status.inputs["compose_file"] = handle.get("compose_file", "") + status.inputs["ssh_user"] = handle.get("ssh_user", "") + status.inputs["compose_cmd"] = handle.get("compose_cmd", "") + status.inputs["ssh_port"] = handle.get("ssh_port", "") + status.inputs["vm_target"] = handle.get("vm_target", "") + status.inputs["vm_host"] = handle.get("vm_host", "") + status.inputs["endpoint"] = handle.get("endpoint", "") + status.inputs["restored_from"] = record.snapshot_id + reach = backend.wait_ready(handle) + status.reachability = Reachability(**reach) + status.state = SandboxState.READY + status.ready_at = utcnow() + status.updated_at = status.ready_at + self.store.save(status) + emit_lifecycle_event( + status, + summary=f"Sandbox restored from snapshot {snapshot_id}", + event_type=event_type_for_state(status.state), + ) + return status + except Exception as exc: + status.state = SandboxState.FAILED + status.error = str(exc) + status.updated_at = utcnow() + if status.sandbox_id != "pending": + self.store.save(status) + emit_lifecycle_event( + status, + summary=f"Snapshot restore failed: {exc}", + event_type=event_type_for_state(status.state), + ) + raise \ No newline at end of file diff --git a/src/sandboxer/extensions/base.py b/src/sandboxer/extensions/base.py index 39ba1e5..19ac9ad 100644 --- a/src/sandboxer/extensions/base.py +++ b/src/sandboxer/extensions/base.py @@ -45,4 +45,22 @@ class SandboxExtension(ABC): def meter_actual(self, handle: dict[str, str], *, duration_s: float) -> float | None: """Optional post-destroy actual cost in USD.""" - return None \ No newline at end of file + return None + + def supports_snapshots(self) -> bool: + """Whether this extension implements checkpoint snapshot/restore.""" + return False + + def snapshot(self, handle: dict[str, str]) -> dict[str, str]: + """Capture workspace checkpoint. Returns snapshot metadata including snapshot_id.""" + raise NotImplementedError(f"{type(self).__name__} does not support snapshots") + + def restore_from_snapshot( + self, + profile: Profile, + snapshot_meta: dict[str, str], + inputs: dict[str, str], + host: str, + ) -> dict[str, str]: + """Provision a new sandbox from a prior checkpoint.""" + raise NotImplementedError(f"{type(self).__name__} does not support restore") \ No newline at end of file diff --git a/src/sandboxer/extensions/compose_ssh.py b/src/sandboxer/extensions/compose_ssh.py index 5116049..6429812 100644 --- a/src/sandboxer/extensions/compose_ssh.py +++ b/src/sandboxer/extensions/compose_ssh.py @@ -3,6 +3,7 @@ from __future__ import annotations import os +import uuid from pathlib import Path from typing import Any @@ -35,6 +36,89 @@ class ComposeSSHExtension(SandboxExtension): def _is_podman_compose(self) -> bool: return self._compose_bin().startswith("podman-compose") + def supports_snapshots(self) -> bool: + return True + + def _ssh_for_handle(self, handle: dict[str, str]) -> SSHConfig: + ssh_user = handle.get("ssh_user") or self.ssh_user or None + return SSHConfig.from_env(handle["host"], user=ssh_user) + + def snapshot(self, handle: dict[str, str]) -> dict[str, str]: + remote_dir = handle["remote_dir"] + snapshot_id = str(uuid.uuid4())[:12] + snapshot_dir = f"{self.base_dir}/snapshots" + artifact = f"{snapshot_dir}/{snapshot_id}.tar.gz" + ssh = self._ssh_for_handle(handle) + + rc, out = ssh.run(f"mkdir -p {snapshot_dir}") + if rc != 0: + raise RuntimeError(f"Failed to create snapshot dir: {out}") + + rc, out = ssh.run(f"tar czf {artifact} -C {remote_dir} .", timeout=300) + if rc != 0: + raise RuntimeError(f"snapshot tar failed: {out}") + + rc, out = ssh.run(f"stat -c %s {artifact} 2>/dev/null || stat -f %z {artifact}") + size_bytes = int(out.strip()) if rc == 0 and out.strip().isdigit() else None + + return { + "snapshot_id": snapshot_id, + "artifact_path": artifact, + "host": handle["host"], + "remote_dir": remote_dir, + "compose_file": handle.get("compose_file", ""), + "compose_project": handle.get("compose_project", ""), + "ssh_user": handle.get("ssh_user", ""), + "compose_cmd": handle.get("compose_cmd") or self._compose_bin(), + "size_bytes": str(size_bytes) if size_bytes is not None else "", + } + + def restore_from_snapshot( + self, + profile: Profile, + snapshot_meta: dict[str, str], + inputs: dict[str, str], + host: str, + ) -> dict[str, str]: + artifact_host = snapshot_meta.get("host") or host + if artifact_host != host: + raise NotImplementedError("cross-host restore is not supported in v0") + + sandbox_id = self.new_sandbox_id(inputs) + remote_dir = f"{self.base_dir}/{sandbox_id}" + artifact = snapshot_meta["artifact_path"] + compose_file = snapshot_meta.get("compose_file") or inputs.get("compose_file", "") + if not compose_file: + raise ValueError("snapshot missing compose_file") + + ssh_user = snapshot_meta.get("ssh_user") or self.ssh_user or None + ssh = SSHConfig.from_env(host, user=ssh_user) + + rc, out = ssh.run(f"mkdir -p {remote_dir}") + if rc != 0: + raise RuntimeError(f"Failed to create remote dir: {out}") + + rc, out = ssh.run(f"tar xzf {artifact} -C {remote_dir}", timeout=300) + if rc != 0: + raise RuntimeError(f"snapshot extract failed: {out}") + + project_name = f"sbx-{profile.id.split('.')[-1]}-{sandbox_id}" + compose_cmd = snapshot_meta.get("compose_cmd") or self._compose_bin() + up_cmd = self._compose_invocation(remote_dir, project_name, compose_file, "up -d") + rc, out = ssh.run(up_cmd, timeout=self.compose_timeout_s) + if rc != 0: + raise RuntimeError(f"compose up after restore failed: {out}") + + return { + "sandbox_id": sandbox_id, + "host": host, + "remote_dir": remote_dir, + "compose_project": project_name, + "compose_file": compose_file, + "ssh_user": ssh.user or "", + "compose_cmd": compose_cmd, + } + def provision( self, profile: Profile, inputs: dict[str, str], host: str ) -> dict[str, str]: diff --git a/src/sandboxer/extensions/saas_stub.py b/src/sandboxer/extensions/saas_stub.py index 6bfd959..8f84d99 100644 --- a/src/sandboxer/extensions/saas_stub.py +++ b/src/sandboxer/extensions/saas_stub.py @@ -6,6 +6,7 @@ fallback without E2B/Modal credentials. from __future__ import annotations +import uuid from typing import Any from sandboxer.extensions.base import SandboxExtension @@ -41,6 +42,32 @@ class SaaSStubExtension(SandboxExtension): hours = max(duration_s / 3600.0, 1 / 3600) return round(self.session_fee_usd + hours * self.rate_usd_per_hour, 4) + def supports_snapshots(self) -> bool: + return True + + def snapshot(self, handle: dict[str, str]) -> dict[str, str]: + snapshot_id = str(uuid.uuid4())[:12] + return { + "snapshot_id": snapshot_id, + "artifact_path": "", + "host": handle.get("host", self.provider), + "endpoint": handle.get("endpoint", ""), + "sandbox_id": handle.get("sandbox_id", ""), + "stub": "true", + } + + def restore_from_snapshot( + self, + profile: Profile, + snapshot_meta: dict[str, str], + inputs: dict[str, str], + host: str, + ) -> dict[str, str]: + merged = dict(inputs) + if snapshot_meta.get("endpoint"): + merged.setdefault("restore_from", snapshot_meta["endpoint"]) + return self.provision(profile, merged, host) + def provision( self, profile: Profile, inputs: dict[str, str], host: str ) -> dict[str, str]: diff --git a/src/sandboxer/models.py b/src/sandboxer/models.py index 20f75d8..27e1660 100644 --- a/src/sandboxer/models.py +++ b/src/sandboxer/models.py @@ -170,4 +170,24 @@ class SandboxStatus(BaseModel): created_at: datetime updated_at: datetime ready_at: datetime | None = None - destroyed_at: datetime | None = None \ No newline at end of file + destroyed_at: datetime | None = None + + +class SnapshotRestoreRequest(BaseModel): + host: str | None = None + consumer: Consumer | None = None + + +class SnapshotRecord(BaseModel): + snapshot_id: str + sandbox_id: str + profile_id: str + extension_id: str + host: str + artifact_path: str = "" + handle: dict[str, str] = Field(default_factory=dict) + inputs: dict[str, str] = Field(default_factory=dict) + consumer: Consumer | None = None + name: str | None = None + size_bytes: int | None = None + created_at: datetime \ No newline at end of file diff --git a/src/sandboxer/snapshots/__init__.py b/src/sandboxer/snapshots/__init__.py new file mode 100644 index 0000000..75930d7 --- /dev/null +++ b/src/sandboxer/snapshots/__init__.py @@ -0,0 +1,5 @@ +"""Snapshot checkpoint persistence.""" + +from sandboxer.snapshots.store import SnapshotStore + +__all__ = ["SnapshotStore"] \ No newline at end of file diff --git a/src/sandboxer/snapshots/store.py b/src/sandboxer/snapshots/store.py new file mode 100644 index 0000000..b237623 --- /dev/null +++ b/src/sandboxer/snapshots/store.py @@ -0,0 +1,47 @@ +"""Persistent snapshot index (JSON file).""" + +from __future__ import annotations + +import json +import os +from pathlib import Path + +from sandboxer.models import SnapshotRecord + + +def _default_store_path() -> Path: + base = Path(os.environ.get("XDG_DATA_HOME", Path.home() / ".local" / "share")) + return base / "sandboxer" / "snapshots.json" + + +class SnapshotStore: + def __init__(self, path: Path | None = None) -> None: + self.path = path or _default_store_path() + self.path.parent.mkdir(parents=True, exist_ok=True) + + def _read(self) -> dict[str, dict]: + if not self.path.exists(): + return {} + return json.loads(self.path.read_text()) + + def _write(self, data: dict[str, dict]) -> None: + self.path.write_text(json.dumps(data, indent=2, default=str)) + + def save(self, record: SnapshotRecord) -> None: + data = self._read() + data[record.snapshot_id] = record.model_dump(mode="json") + self._write(data) + + def get(self, snapshot_id: str) -> SnapshotRecord | None: + raw = self._read().get(snapshot_id) + if not raw: + return None + return SnapshotRecord.model_validate(raw) + + def list_all(self) -> list[SnapshotRecord]: + return [SnapshotRecord.model_validate(v) for v in self._read().values()] + + def delete(self, snapshot_id: str) -> None: + data = self._read() + data.pop(snapshot_id, None) + self._write(data) \ No newline at end of file diff --git a/tests/test_api.py b/tests/test_api.py index 54b5c61..b118538 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -5,7 +5,7 @@ from unittest.mock import patch from fastapi.testclient import TestClient from sandboxer.api.app import app -from sandboxer.models import ActorType, Consumer, SandboxState, SandboxStatus +from sandboxer.models import ActorType, Consumer, SandboxState, SandboxStatus, SnapshotRecord def test_list_sandboxes_empty() -> None: @@ -46,4 +46,46 @@ def test_create_sandbox() -> None: }, ) assert resp.status_code == 200 - assert resp.json()["sandbox_id"] == "abc12345" \ No newline at end of file + assert resp.json()["sandbox_id"] == "abc12345" + + +def test_snapshot_sandbox() -> None: + from datetime import UTC, datetime + + record = SnapshotRecord( + snapshot_id="snap12345678", + sandbox_id="abc12345", + profile_id="profile.compose-checkpoint", + extension_id="ext.compose-ssh", + host="coulombcore", + created_at=datetime.now(UTC), + ) + with patch("sandboxer.api.app._manager") as mgr: + mgr.snapshot.return_value = record + client = TestClient(app) + resp = client.post("/v1/sandboxes/abc12345/snapshot") + assert resp.status_code == 200 + assert resp.json()["snapshot_id"] == "snap12345678" + + +def test_restore_snapshot() -> None: + from datetime import UTC, datetime + + status = SandboxStatus( + sandbox_id="restored1", + profile_id="profile.compose-checkpoint", + extension_id="ext.compose-ssh", + state=SandboxState.READY, + consumer=Consumer(actor=ActorType.ADM, project="sand-boxer"), + created_at=datetime.now(UTC), + updated_at=datetime.now(UTC), + ) + with patch("sandboxer.api.app._manager") as mgr: + mgr.restore.return_value = status + client = TestClient(app) + resp = client.post( + "/v1/snapshots/snap12345678/restore", + json={"consumer": {"actor": "adm", "project": "sand-boxer"}}, + ) + assert resp.status_code == 200 + assert resp.json()["sandbox_id"] == "restored1" \ No newline at end of file diff --git a/tests/test_compose_ssh.py b/tests/test_compose_ssh.py index 1f0995d..0bf5d26 100644 --- a/tests/test_compose_ssh.py +++ b/tests/test_compose_ssh.py @@ -1,6 +1,21 @@ -"""Compose command configuration.""" +"""Compose command configuration and snapshot hooks.""" + +from unittest.mock import patch + +import pytest from sandboxer.extensions.compose_ssh import ComposeSSHExtension +from sandboxer.models import Profile + + +def _profile() -> Profile: + return Profile.model_validate( + { + "id": "profile.compose-checkpoint", + "version": "1.0.0", + "extension": "ext.compose-ssh", + } + ) def test_compose_cmd_from_config() -> None: @@ -11,4 +26,72 @@ def test_compose_cmd_from_config() -> None: def test_compose_cmd_env_override(monkeypatch) -> None: monkeypatch.setenv("SANDBOXER_COMPOSE_CMD", "nerdctl compose") ext = ComposeSSHExtension({"compose_cmd": "docker compose"}) - assert ext._compose_bin() == "nerdctl compose" \ No newline at end of file + assert ext._compose_bin() == "nerdctl compose" + + +def test_supports_snapshots() -> None: + ext = ComposeSSHExtension() + assert ext.supports_snapshots() is True + + +def test_snapshot_creates_remote_tarball() -> None: + ext = ComposeSSHExtension({"base_dir": "/tmp/sandboxer"}) + handle = { + "sandbox_id": "abc12345", + "host": "coulombcore", + "remote_dir": "/tmp/sandboxer/abc12345", + "compose_file": "docker-compose.yml", + "compose_project": "sbx-e2e-abc12345", + "ssh_user": "root", + } + + def fake_run(cmd, *, timeout=60): + if "tar czf" in cmd: + return 0, "" + if "stat" in cmd: + return 0, "2048" + return 0, "" + + with patch.object(ext, "_ssh_for_handle") as ssh_factory: + ssh = ssh_factory.return_value + ssh.run.side_effect = fake_run + meta = ext.snapshot(handle) + + assert meta["artifact_path"].endswith(".tar.gz") + assert meta["snapshot_id"] + assert meta["size_bytes"] == "2048" + + +def test_restore_from_snapshot_extracts_and_compose_up() -> None: + ext = ComposeSSHExtension({"base_dir": "/tmp/sandboxer"}) + snapshot_meta = { + "snapshot_id": "snap12345678", + "artifact_path": "/tmp/sandboxer/snapshots/snap12345678.tar.gz", + "host": "coulombcore", + "compose_file": "docker-compose.yml", + "ssh_user": "root", + } + + with patch("sandboxer.extensions.compose_ssh.SSHConfig.from_env") as ssh_factory: + ssh = ssh_factory.return_value + ssh.run.return_value = (0, "") + ssh.user = "root" + handle = ext.restore_from_snapshot(_profile(), snapshot_meta, {}, "coulombcore") + + assert handle["sandbox_id"] + assert handle["remote_dir"].endswith(handle["sandbox_id"]) + calls = [c.args[0] for c in ssh.run.call_args_list] + assert any("tar xzf" in c for c in calls) + assert any("up -d" in c for c in calls) + + +def test_restore_cross_host_not_supported() -> None: + ext = ComposeSSHExtension() + snapshot_meta = { + "snapshot_id": "snap1", + "artifact_path": "/tmp/snap.tar.gz", + "host": "host-a", + "compose_file": "docker-compose.yml", + } + with pytest.raises(NotImplementedError, match="cross-host"): + ext.restore_from_snapshot(_profile(), snapshot_meta, {}, "host-b") \ No newline at end of file diff --git a/tests/test_extension_base.py b/tests/test_extension_base.py index 8ff72b5..38578f9 100644 --- a/tests/test_extension_base.py +++ b/tests/test_extension_base.py @@ -1,5 +1,7 @@ """Extension SDK base class tests.""" +import pytest + from sandboxer.extensions.base import SandboxExtension from sandboxer.extensions.compose_ssh import ComposeSSHExtension from sandboxer.extensions.vm_packer import VMPackerExtension @@ -13,4 +15,21 @@ def test_reference_extensions_subclass_base() -> None: def test_new_sandbox_id_from_inputs() -> None: assert SandboxExtension.new_sandbox_id({"sandbox_id": "fixed123"}) == "fixed123" generated = SandboxExtension.new_sandbox_id({}) - assert len(generated) == 8 \ No newline at end of file + assert len(generated) == 8 + + +def test_default_snapshot_not_supported() -> None: + class MinimalExtension(SandboxExtension): + def provision(self, profile, inputs, host): + return {} + + def wait_ready(self, handle): + return {} + + def teardown(self, handle): + return {} + + ext = MinimalExtension() + assert ext.supports_snapshots() is False + with pytest.raises(NotImplementedError): + ext.snapshot({}) \ No newline at end of file diff --git a/tests/test_snapshots.py b/tests/test_snapshots.py new file mode 100644 index 0000000..82c0db3 --- /dev/null +++ b/tests/test_snapshots.py @@ -0,0 +1,172 @@ +"""Snapshot store and manager checkpoint tests.""" + +from __future__ import annotations + +from datetime import UTC, datetime +from pathlib import Path +from unittest.mock import patch + +import pytest + +from sandboxer.core.manager import SandboxManager +from sandboxer.lifecycle.store import SandboxStore +from sandboxer.models import ( + ActorType, + Consumer, + Reachability, + SandboxCreateRequest, + SandboxState, + SandboxStatus, + SnapshotRecord, +) +from sandboxer.snapshots.store import SnapshotStore + + +class SnapshotBackend: + def supports_snapshots(self) -> bool: + return True + + def provision(self, profile, inputs, host): + return { + "sandbox_id": "test1234", + "host": host, + "remote_dir": "/tmp/sandboxer/test1234", + "compose_project": "sbx-e2e-test1234", + "compose_file": "docker-compose.yml", + "ssh_user": "root", + } + + def wait_ready(self, handle): + return { + "ssh": f"root@{handle['host']}", + "remote_dir": handle["remote_dir"], + "compose_project": handle["compose_project"], + "host": handle["host"], + } + + def teardown(self, handle): + return {"compose_removed": "True", "remote_dir_removed": "True"} + + def snapshot(self, handle): + return { + "snapshot_id": "snap12345678", + "artifact_path": "/tmp/sandboxer/snapshots/snap12345678.tar.gz", + "host": handle["host"], + "size_bytes": "4096", + } + + def restore_from_snapshot(self, profile, snapshot_meta, inputs, host): + return { + "sandbox_id": "restored1", + "host": host, + "remote_dir": "/tmp/sandboxer/restored1", + "compose_project": "sbx-e2e-restored1", + "compose_file": "docker-compose.yml", + "ssh_user": "root", + } + + +@pytest.fixture +def store(tmp_path: Path) -> SandboxStore: + return SandboxStore(path=tmp_path / "sandboxes.json") + + +@pytest.fixture +def snapshots(tmp_path: Path) -> SnapshotStore: + return SnapshotStore(path=tmp_path / "snapshots.json") + + +def _ready_status(sandbox_id: str = "test1234") -> SandboxStatus: + now = datetime.now(UTC) + return SandboxStatus( + sandbox_id=sandbox_id, + profile_id="profile.compose-checkpoint", + extension_id="ext.compose-ssh", + state=SandboxState.READY, + consumer=Consumer(actor=ActorType.ADM, project="sand-boxer"), + host="coulombcore", + reachability=Reachability( + ssh="root@coulombcore", + remote_dir="/tmp/sandboxer/test1234", + compose_project="sbx-e2e-test1234", + host="coulombcore", + ), + inputs={ + "repo": "/tmp/repo", + "compose_file": "docker-compose.yml", + "ssh_user": "root", + }, + created_at=now, + updated_at=now, + ready_at=now, + ) + + +def test_snapshot_store_roundtrip(snapshots: SnapshotStore) -> None: + now = datetime.now(UTC) + record = SnapshotRecord( + snapshot_id="snap12345678", + sandbox_id="test1234", + profile_id="profile.compose-checkpoint", + extension_id="ext.compose-ssh", + host="coulombcore", + artifact_path="/tmp/snap.tar.gz", + created_at=now, + ) + snapshots.save(record) + loaded = snapshots.get("snap12345678") + assert loaded is not None + assert loaded.sandbox_id == "test1234" + + +def test_manager_snapshot_and_restore(store: SandboxStore, snapshots: SnapshotStore) -> None: + store.save(_ready_status()) + manager = SandboxManager(store=store, snapshots=snapshots) + backend = SnapshotBackend() + + with ( + patch("sandboxer.core.manager.resolve_backend", return_value=backend), + patch("sandboxer.core.manager.load_extension"), + patch("sandboxer.core.manager.emit_lifecycle_event", return_value=None), + patch("sandboxer.core.manager.load_profile"), + patch("sandboxer.core.manager.resolve_host", return_value="coulombcore"), + ): + record = manager.snapshot("test1234", name="pre-test") + assert record.snapshot_id == "snap12345678" + assert record.name == "pre-test" + assert record.size_bytes == 4096 + + status = manager.restore("snap12345678") + assert status.state == SandboxState.READY + assert status.sandbox_id == "restored1" + assert status.inputs.get("restored_from") == "snap12345678" + + +def test_snapshot_requires_ready(store: SandboxStore, snapshots: SnapshotStore) -> None: + status = _ready_status() + status.state = SandboxState.PROVISIONING + store.save(status) + manager = SandboxManager(store=store, snapshots=snapshots) + + with pytest.raises(RuntimeError, match="ready"): + manager.snapshot("test1234") + + +def test_create_snapshot_restore_flow(store: SandboxStore, snapshots: SnapshotStore) -> None: + manager = SandboxManager(store=store, snapshots=snapshots) + backend = SnapshotBackend() + request = SandboxCreateRequest( + profile="profile.compose-checkpoint", + inputs={"repo": "/tmp/repo"}, + consumer=Consumer(actor=ActorType.ADM, project="sand-boxer"), + ) + + with ( + patch("sandboxer.core.manager.resolve_backend", return_value=backend), + patch("sandboxer.core.manager.emit_lifecycle_event", return_value=None), + patch("sandboxer.core.manager.resolve_host", return_value="coulombcore"), + ): + created = manager.create(request) + record = manager.snapshot(created.sandbox_id) + restored = manager.restore(record.snapshot_id) + assert restored.sandbox_id == "restored1" \ No newline at end of file diff --git a/workplans/SAND-WP-0007-snapshot-restore.md b/workplans/SAND-WP-0007-snapshot-restore.md new file mode 100644 index 0000000..db537de --- /dev/null +++ b/workplans/SAND-WP-0007-snapshot-restore.md @@ -0,0 +1,85 @@ +--- +id: SAND-WP-0007 +type: workplan +title: "Snapshot restore and checkpoint profiles" +domain: infotech +repo: sand-boxer +status: finished +owner: codex +topic_slug: custodian +created: "2026-06-24" +updated: "2026-06-24" +--- + +# Snapshot restore and checkpoint profiles + +Workspace checkpoint API for self-hosted compose sandboxes and SaaS stub. + +**Predecessor:** SAND-WP-0006 (SaaS extensions — finished) +**Follow-on:** TTL enforcement, cross-host snapshot transfer, E2B/Modal persistence + +## Snapshot store and models + +```task +id: SAND-WP-0007-T01 +status: done +priority: high +``` + +`SnapshotRecord`, `SnapshotStore` at `~/.local/share/sandboxer/snapshots.json`. + +## Extension hooks + +```task +id: SAND-WP-0007-T02 +status: done +priority: high +``` + +Optional `supports_snapshots`, `snapshot`, `restore_from_snapshot` on +`SandboxExtension`. Reference: `ext.compose-ssh` (remote tar), `ext.saas-stub` +(metadata stub). + +## Manager orchestration + +```task +id: SAND-WP-0007-T03 +status: done +priority: high +``` + +`SandboxManager.snapshot`, `restore`, `list_snapshots`, `get_snapshot`. Restore +provisions a new sandbox; source sandbox stays ready. + +## CLI and HTTP API + +```task +id: SAND-WP-0007-T04 +status: done +priority: high +``` + +CLI: `snapshot`, `restore`, `snapshots list|get`. HTTP: +`POST /v1/sandboxes/{id}/snapshot`, `POST /v1/snapshots/{id}/restore`, +`GET /v1/snapshots`. + +## Profile and docs + +```task +id: SAND-WP-0007-T05 +status: done +priority: medium +``` + +`profile.compose-checkpoint`, `docs/snapshots.md`, updates to `extension-sdk.md`, +`meta-framework.md`, `SCOPE.md`. + +## Tests + +```task +id: SAND-WP-0007-T06 +status: done +priority: high +``` + +`tests/test_snapshots.py`, compose-ssh snapshot tests, API stubs, manager flow. \ No newline at end of file