generated from coulomb/repo-seed
Add S3 backend and storage verification
This commit is contained in:
@@ -19,17 +19,17 @@ source of truth (ADR-0002).
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from collections.abc import AsyncIterator, Sequence
|
||||
from collections.abc import AsyncIterator, Awaitable, Callable, Sequence
|
||||
from dataclasses import dataclass
|
||||
from datetime import UTC, datetime
|
||||
from typing import Any
|
||||
from typing import Any, cast
|
||||
from uuid import UUID
|
||||
|
||||
import cbor2
|
||||
from sqlalchemy import select, text
|
||||
from sqlalchemy.ext.asyncio import AsyncEngine
|
||||
|
||||
from artifactstore.dataplane.spi import DataPlane
|
||||
from artifactstore.dataplane.spi import DataPlane, IngestHints
|
||||
from artifactstore.db.schema import (
|
||||
artifact_files,
|
||||
artifact_packages,
|
||||
@@ -76,6 +76,7 @@ __all__ = [
|
||||
"RetentionClassRecord",
|
||||
"RetentionStateError",
|
||||
"RetentionStateRecord",
|
||||
"StorageVerificationRecord",
|
||||
]
|
||||
|
||||
|
||||
@@ -158,6 +159,18 @@ class RetentionStateRecord:
|
||||
eligible_for_deletion: bool
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class StorageVerificationRecord:
|
||||
"""Result of verifying one storage location."""
|
||||
|
||||
storage_location_id: UUID
|
||||
file_id: UUID
|
||||
backend_id: str
|
||||
content_address: str
|
||||
verified: bool
|
||||
mismatch: str | None
|
||||
|
||||
|
||||
_RETENTION_EVENT_TYPES = (
|
||||
"v1.retention.default_applied",
|
||||
"v1.retention.extended",
|
||||
@@ -176,11 +189,13 @@ class Registry:
|
||||
dataplane: DataPlane,
|
||||
view_writer: RegistryViewWriter | None = None,
|
||||
retention_policy: RetentionPolicy | None = None,
|
||||
backend_selector: Callable[[str, str], str | None] | None = None,
|
||||
) -> None:
|
||||
self._engine = engine
|
||||
self._dataplane = dataplane
|
||||
self._view_writer = view_writer or RegistryViewWriter()
|
||||
self._retention_policy = retention_policy or RetentionPolicy()
|
||||
self._backend_selector = backend_selector
|
||||
|
||||
# ---- mutating operations ------------------------------------------------
|
||||
|
||||
@@ -279,7 +294,15 @@ class Registry:
|
||||
f"relative_path {relative_path!r} already exists in package {package_id}"
|
||||
)
|
||||
|
||||
ingest = await self._dataplane.ingest_stream(stream)
|
||||
selected_backend = (
|
||||
self._backend_selector(pkg_row.producer, pkg_row.retention_class)
|
||||
if self._backend_selector is not None
|
||||
else None
|
||||
)
|
||||
ingest = await self._dataplane.ingest_stream(
|
||||
stream,
|
||||
hints=IngestHints(backend_id=selected_backend),
|
||||
)
|
||||
|
||||
file_id = uuid.uuid4()
|
||||
storage_location_id = uuid.uuid4()
|
||||
@@ -691,6 +714,73 @@ class Registry:
|
||||
for r in rows
|
||||
]
|
||||
|
||||
async def verify_storage_locations(
|
||||
self,
|
||||
*,
|
||||
backend_id: str | None = None,
|
||||
actor: str = "storage-verifier",
|
||||
) -> list[StorageVerificationRecord]:
|
||||
"""Re-read storage locations and emit verification events."""
|
||||
stmt = select(storage_locations)
|
||||
if backend_id is not None:
|
||||
stmt = stmt.where(storage_locations.c.backend_id == backend_id)
|
||||
async with self._engine.connect() as conn:
|
||||
rows = (await conn.execute(stmt.order_by(storage_locations.c.id))).all()
|
||||
|
||||
results: list[StorageVerificationRecord] = []
|
||||
for row in rows:
|
||||
ca = ContentAddress(row.content_address)
|
||||
verified = False
|
||||
mismatch: str | None = None
|
||||
actual_size_bytes: int | None = None
|
||||
actual_primary_hex: str | None = None
|
||||
actual_sha256_hex: str | None = None
|
||||
try:
|
||||
result = await self._dataplane.verify_object(ca, backend_id=row.backend_id)
|
||||
verified = result.verified
|
||||
mismatch = result.mismatch
|
||||
actual_size_bytes = result.actual_size_bytes
|
||||
actual_primary_hex = result.actual_primary_digest.hex
|
||||
actual_sha256_hex = result.actual_sha256_digest.hex
|
||||
except Exception as exc:
|
||||
mismatch = f"{type(exc).__name__}: {exc}"
|
||||
|
||||
payload = cbor2.dumps(
|
||||
{
|
||||
"storage_location_id": str(row.id),
|
||||
"file_id": str(row.artifact_file_id),
|
||||
"backend_id": row.backend_id,
|
||||
"content_address": row.content_address,
|
||||
"verified": verified,
|
||||
"mismatch": mismatch,
|
||||
"actual_size_bytes": actual_size_bytes,
|
||||
"actual_primary_hex": actual_primary_hex,
|
||||
"actual_sha256_hex": actual_sha256_hex,
|
||||
},
|
||||
canonical=True,
|
||||
)
|
||||
event = make_event(
|
||||
event_type="v1.storage.location_verified",
|
||||
subject_kind="storage",
|
||||
subject_id=row.artifact_file_id,
|
||||
actor=actor,
|
||||
payload=payload,
|
||||
)
|
||||
async with self._engine.begin() as conn:
|
||||
written = await write(conn, event)
|
||||
await self._view_writer.apply(conn, written)
|
||||
results.append(
|
||||
StorageVerificationRecord(
|
||||
storage_location_id=row.id,
|
||||
file_id=row.artifact_file_id,
|
||||
backend_id=row.backend_id,
|
||||
content_address=row.content_address,
|
||||
verified=verified,
|
||||
mismatch=mismatch,
|
||||
)
|
||||
)
|
||||
return results
|
||||
|
||||
async def get_manifest_bytes(self, package_id: UUID, *, format: str = "cbor") -> bytes:
|
||||
"""Return the finalised manifest. ``format`` is ``cbor`` (canonical
|
||||
CBOR, the wire form) or ``json`` (the JCS projection)."""
|
||||
@@ -724,7 +814,11 @@ class Registry:
|
||||
"""Return an async byte iterator for the bytes of a stored file."""
|
||||
record = await self.get_file_metadata(file_id)
|
||||
ca = ContentAddress(record.content_address)
|
||||
return await self._dataplane.serve_object(ca, byte_range=byte_range)
|
||||
return await self._dataplane.serve_object(
|
||||
ca,
|
||||
byte_range=byte_range,
|
||||
backend_id=record.backend_id,
|
||||
)
|
||||
|
||||
async def fetch_events(
|
||||
self,
|
||||
@@ -769,6 +863,24 @@ class Registry:
|
||||
"""Probe the configured storage backend through the data plane."""
|
||||
return await self._dataplane.backend_health()
|
||||
|
||||
async def backend_health_all(self) -> list[BackendStatus]:
|
||||
"""Probe every configured storage backend when the data plane supports it."""
|
||||
probe_all = getattr(self._dataplane, "backend_health_all", None)
|
||||
if probe_all is None:
|
||||
return [await self.backend_health()]
|
||||
typed_probe = cast(Callable[[], Awaitable[list[BackendStatus]]], probe_all)
|
||||
return await typed_probe()
|
||||
|
||||
async def failed_storage_locations_count(self) -> int:
|
||||
"""Count storage locations currently marked failed."""
|
||||
async with self._engine.connect() as conn:
|
||||
rows = (
|
||||
await conn.execute(
|
||||
select(storage_locations.c.id).where(storage_locations.c.status == "failed")
|
||||
)
|
||||
).all()
|
||||
return len(rows)
|
||||
|
||||
async def dispose(self) -> None:
|
||||
"""Release the engine's connection pool. Idempotent."""
|
||||
await self._engine.dispose()
|
||||
|
||||
Reference in New Issue
Block a user