Add reference-counted garbage collection

This commit is contained in:
2026-05-17 00:26:19 +02:00
parent 1dae855700
commit a60d24f814
7 changed files with 631 additions and 3 deletions

View File

@@ -186,6 +186,34 @@ def retention_sweep() -> None:
typer.echo(json.dumps({"marked_package_ids": marked, "marked_count": len(marked)}, indent=2))
@retention_app.command("gc")
def retention_gc() -> None:
"""Run one garbage-collection pass for deletion-eligible packages."""
settings = get_settings()
results = asyncio.run(_garbage_collect_async(settings))
object_keys = {
(r["backend_id"], r["content_address"])
for r in results
if r["object_delete_attempted"]
}
deleted_object_keys = {
(r["backend_id"], r["content_address"])
for r in results
if r["object_delete_attempted"] and r["object_deleted"]
}
typer.echo(
json.dumps(
{
"released_location_count": len(results),
"delete_attempted_object_count": len(object_keys),
"deleted_object_count": len(deleted_object_keys),
"results": results,
},
indent=2,
)
)
@storage_app.command("verify")
def storage_verify(
backend: str | None = typer.Option(
@@ -285,6 +313,30 @@ async def _retention_sweep_async(settings: Settings) -> list[str]:
return [str(package_id) for package_id in marked]
async def _garbage_collect_async(settings: Settings) -> list[dict[str, Any]]:
from artifactstore.app import build_registry
registry: Registry = build_registry(settings)
try:
results = await registry.collect_garbage()
finally:
await registry.dispose()
return [
{
"storage_location_id": str(result.storage_location_id),
"file_id": str(result.file_id),
"package_id": str(result.package_id),
"backend_id": result.backend_id,
"content_address": result.content_address,
"object_delete_attempted": result.object_delete_attempted,
"object_deleted": result.object_deleted,
"ref_count_before": result.ref_count_before,
"ref_count_after": result.ref_count_after,
}
for result in results
]
async def _storage_verify_async(
settings: Settings,
*,

View File

@@ -23,7 +23,7 @@ from datetime import UTC, datetime
from uuid import UUID
import cbor2
from sqlalchemy import delete, insert, update
from sqlalchemy import delete, insert, select, update
from sqlalchemy.ext.asyncio import AsyncConnection
from artifactstore.db.schema import (
@@ -267,6 +267,47 @@ async def _apply_storage_location_verified(
)
async def _apply_storage_location_deleted(
connection: AsyncConnection,
event: Event,
) -> None:
if event.subject_id is None:
raise ValueError("v1.storage.location_deleted event must have subject_id")
payload = cbor2.loads(event.payload)
await connection.execute(
update(storage_locations)
.where(storage_locations.c.id == UUID(payload["storage_location_id"]))
.values(
status="deleted",
restore_status="object_deleted"
if payload.get("object_deleted")
else "reference_released",
)
)
remaining = (
await connection.execute(
select(storage_locations.c.id)
.join(
artifact_files,
artifact_files.c.id == storage_locations.c.artifact_file_id,
)
.where(
artifact_files.c.package_id == event.subject_id,
storage_locations.c.status != "deleted",
)
.limit(1)
)
).first()
package_values: dict[str, object] = {"last_event_sequence": event.sequence}
if remaining is None:
package_values["status"] = "garbage_collected"
await connection.execute(
update(artifact_packages)
.where(artifact_packages.c.id == event.subject_id)
.values(**package_values)
)
def _parse_iso(value: str | None) -> datetime | None:
if value is None:
return None
@@ -286,4 +327,5 @@ _HANDLERS = {
"v1.retention.hold_released": _apply_retention_hold_released,
"v1.retention.deletion_eligible": _apply_retention_deletion_eligible,
"v1.storage.location_verified": _apply_storage_location_verified,
"v1.storage.location_deleted": _apply_storage_location_deleted,
}

View File

@@ -70,6 +70,7 @@ __all__ = [
"DuplicateRelativePathError",
"FileNotFoundError",
"FileRecord",
"GarbageCollectionRecord",
"IllegalPackageStateError",
"MetadataSchemaRecord",
"PackageNotFoundError",
@@ -183,6 +184,21 @@ class StorageVerificationRecord:
mismatch: str | None
@dataclass(frozen=True, slots=True)
class GarbageCollectionRecord:
"""Result of releasing one storage location during garbage collection."""
storage_location_id: UUID
file_id: UUID
package_id: UUID
backend_id: str
content_address: str
object_delete_attempted: bool
object_deleted: bool
ref_count_before: int
ref_count_after: int
_RETENTION_EVENT_TYPES = (
"v1.retention.default_applied",
"v1.retention.extended",
@@ -785,6 +801,7 @@ class Registry:
stmt = select(storage_locations)
if backend_id is not None:
stmt = stmt.where(storage_locations.c.backend_id == backend_id)
stmt = stmt.where(storage_locations.c.status != "deleted")
async with self._engine.connect() as conn:
rows = (await conn.execute(stmt.order_by(storage_locations.c.id))).all()
@@ -842,6 +859,109 @@ class Registry:
)
return results
async def collect_garbage(
self,
*,
actor: str = "garbage-collector",
) -> list[GarbageCollectionRecord]:
"""Release deletion-eligible storage locations and delete unreferenced bytes."""
stmt = (
select(
artifact_files.c.package_id,
artifact_files.c.id.label("file_id"),
storage_locations.c.id.label("storage_location_id"),
storage_locations.c.backend_id,
storage_locations.c.content_address,
)
.join(
artifact_files,
artifact_files.c.id == storage_locations.c.artifact_file_id,
)
.join(
retention_state,
retention_state.c.package_id == artifact_files.c.package_id,
)
.where(
retention_state.c.eligible_for_deletion.is_(True),
retention_state.c.active_hold_id.is_(None),
storage_locations.c.status != "deleted",
)
.order_by(
artifact_files.c.package_id,
artifact_files.c.relative_path,
storage_locations.c.id,
)
)
async with self._engine.connect() as conn:
rows = (await conn.execute(stmt)).all()
groups: dict[tuple[str, str], list[Any]] = {}
for row in rows:
groups.setdefault((row.backend_id, row.content_address), []).append(row)
results: list[GarbageCollectionRecord] = []
for (backend_id, content_address), group_rows in groups.items():
async with self._engine.connect() as conn:
active_refs = (
await conn.execute(
select(storage_locations.c.id).where(
storage_locations.c.backend_id == backend_id,
storage_locations.c.content_address == content_address,
storage_locations.c.status != "deleted",
)
)
).all()
ref_count_before = len(active_refs)
ref_count_after = max(ref_count_before - len(group_rows), 0)
object_delete_attempted = ref_count_after == 0
object_deleted = False
if object_delete_attempted:
deletion = await self._dataplane.delete_object(
ContentAddress(content_address),
backend_id=backend_id,
)
object_deleted = deletion.deleted
for row in group_rows:
payload = cbor2.dumps(
{
"storage_location_id": str(row.storage_location_id),
"file_id": str(row.file_id),
"package_id": str(row.package_id),
"backend_id": backend_id,
"content_address": content_address,
"object_delete_attempted": object_delete_attempted,
"object_deleted": object_deleted,
"ref_count_before": ref_count_before,
"ref_count_after": ref_count_after,
},
canonical=True,
)
event = make_event(
event_type="v1.storage.location_deleted",
subject_kind="package",
subject_id=row.package_id,
actor=actor,
payload=payload,
)
async with self._engine.begin() as conn:
written = await write(conn, event)
await self._view_writer.apply(conn, written)
results.append(
GarbageCollectionRecord(
storage_location_id=row.storage_location_id,
file_id=row.file_id,
package_id=row.package_id,
backend_id=backend_id,
content_address=content_address,
object_delete_attempted=object_delete_attempted,
object_deleted=object_deleted,
ref_count_before=ref_count_before,
ref_count_after=ref_count_after,
)
)
return results
async def get_manifest_bytes(self, package_id: UUID, *, format: str = "cbor") -> bytes:
"""Return the finalised manifest. ``format`` is ``cbor`` (canonical
CBOR, the wire form) or ``json`` (the JCS projection)."""
@@ -874,6 +994,8 @@ class Registry:
) -> AsyncIterator[bytes]:
"""Return an async byte iterator for the bytes of a stored file."""
record = await self.get_file_metadata(file_id)
if record.storage_status == "deleted":
raise FileNotFoundError(f"file has been garbage collected: {file_id}")
ca = ContentAddress(record.content_address)
return await self._dataplane.serve_object(
ca,