Add S3 backend and storage verification

This commit is contained in:
2026-05-16 23:26:03 +02:00
parent b7ceaf7682
commit 864f7f203c
18 changed files with 1085 additions and 40 deletions

View File

@@ -19,17 +19,17 @@ source of truth (ADR-0002).
from __future__ import annotations
import uuid
from collections.abc import AsyncIterator, Sequence
from collections.abc import AsyncIterator, Awaitable, Callable, Sequence
from dataclasses import dataclass
from datetime import UTC, datetime
from typing import Any
from typing import Any, cast
from uuid import UUID
import cbor2
from sqlalchemy import select, text
from sqlalchemy.ext.asyncio import AsyncEngine
from artifactstore.dataplane.spi import DataPlane
from artifactstore.dataplane.spi import DataPlane, IngestHints
from artifactstore.db.schema import (
artifact_files,
artifact_packages,
@@ -76,6 +76,7 @@ __all__ = [
"RetentionClassRecord",
"RetentionStateError",
"RetentionStateRecord",
"StorageVerificationRecord",
]
@@ -158,6 +159,18 @@ class RetentionStateRecord:
eligible_for_deletion: bool
@dataclass(frozen=True, slots=True)
class StorageVerificationRecord:
"""Result of verifying one storage location."""
storage_location_id: UUID
file_id: UUID
backend_id: str
content_address: str
verified: bool
mismatch: str | None
_RETENTION_EVENT_TYPES = (
"v1.retention.default_applied",
"v1.retention.extended",
@@ -176,11 +189,13 @@ class Registry:
dataplane: DataPlane,
view_writer: RegistryViewWriter | None = None,
retention_policy: RetentionPolicy | None = None,
backend_selector: Callable[[str, str], str | None] | None = None,
) -> None:
self._engine = engine
self._dataplane = dataplane
self._view_writer = view_writer or RegistryViewWriter()
self._retention_policy = retention_policy or RetentionPolicy()
self._backend_selector = backend_selector
# ---- mutating operations ------------------------------------------------
@@ -279,7 +294,15 @@ class Registry:
f"relative_path {relative_path!r} already exists in package {package_id}"
)
ingest = await self._dataplane.ingest_stream(stream)
selected_backend = (
self._backend_selector(pkg_row.producer, pkg_row.retention_class)
if self._backend_selector is not None
else None
)
ingest = await self._dataplane.ingest_stream(
stream,
hints=IngestHints(backend_id=selected_backend),
)
file_id = uuid.uuid4()
storage_location_id = uuid.uuid4()
@@ -691,6 +714,73 @@ class Registry:
for r in rows
]
async def verify_storage_locations(
self,
*,
backend_id: str | None = None,
actor: str = "storage-verifier",
) -> list[StorageVerificationRecord]:
"""Re-read storage locations and emit verification events."""
stmt = select(storage_locations)
if backend_id is not None:
stmt = stmt.where(storage_locations.c.backend_id == backend_id)
async with self._engine.connect() as conn:
rows = (await conn.execute(stmt.order_by(storage_locations.c.id))).all()
results: list[StorageVerificationRecord] = []
for row in rows:
ca = ContentAddress(row.content_address)
verified = False
mismatch: str | None = None
actual_size_bytes: int | None = None
actual_primary_hex: str | None = None
actual_sha256_hex: str | None = None
try:
result = await self._dataplane.verify_object(ca, backend_id=row.backend_id)
verified = result.verified
mismatch = result.mismatch
actual_size_bytes = result.actual_size_bytes
actual_primary_hex = result.actual_primary_digest.hex
actual_sha256_hex = result.actual_sha256_digest.hex
except Exception as exc:
mismatch = f"{type(exc).__name__}: {exc}"
payload = cbor2.dumps(
{
"storage_location_id": str(row.id),
"file_id": str(row.artifact_file_id),
"backend_id": row.backend_id,
"content_address": row.content_address,
"verified": verified,
"mismatch": mismatch,
"actual_size_bytes": actual_size_bytes,
"actual_primary_hex": actual_primary_hex,
"actual_sha256_hex": actual_sha256_hex,
},
canonical=True,
)
event = make_event(
event_type="v1.storage.location_verified",
subject_kind="storage",
subject_id=row.artifact_file_id,
actor=actor,
payload=payload,
)
async with self._engine.begin() as conn:
written = await write(conn, event)
await self._view_writer.apply(conn, written)
results.append(
StorageVerificationRecord(
storage_location_id=row.id,
file_id=row.artifact_file_id,
backend_id=row.backend_id,
content_address=row.content_address,
verified=verified,
mismatch=mismatch,
)
)
return results
async def get_manifest_bytes(self, package_id: UUID, *, format: str = "cbor") -> bytes:
"""Return the finalised manifest. ``format`` is ``cbor`` (canonical
CBOR, the wire form) or ``json`` (the JCS projection)."""
@@ -724,7 +814,11 @@ class Registry:
"""Return an async byte iterator for the bytes of a stored file."""
record = await self.get_file_metadata(file_id)
ca = ContentAddress(record.content_address)
return await self._dataplane.serve_object(ca, byte_range=byte_range)
return await self._dataplane.serve_object(
ca,
byte_range=byte_range,
backend_id=record.backend_id,
)
async def fetch_events(
self,
@@ -769,6 +863,24 @@ class Registry:
"""Probe the configured storage backend through the data plane."""
return await self._dataplane.backend_health()
async def backend_health_all(self) -> list[BackendStatus]:
"""Probe every configured storage backend when the data plane supports it."""
probe_all = getattr(self._dataplane, "backend_health_all", None)
if probe_all is None:
return [await self.backend_health()]
typed_probe = cast(Callable[[], Awaitable[list[BackendStatus]]], probe_all)
return await typed_probe()
async def failed_storage_locations_count(self) -> int:
"""Count storage locations currently marked failed."""
async with self._engine.connect() as conn:
rows = (
await conn.execute(
select(storage_locations.c.id).where(storage_locations.c.status == "failed")
)
).all()
return len(rows)
async def dispose(self) -> None:
"""Release the engine's connection pool. Idempotent."""
await self._engine.dispose()