Add S3 backend and storage verification

This commit is contained in:
2026-05-16 23:26:03 +02:00
parent b7ceaf7682
commit 864f7f203c
18 changed files with 1085 additions and 40 deletions

View File

@@ -0,0 +1,39 @@
"""Storage configuration parsing tests."""
from __future__ import annotations
from pathlib import Path
import pytest
from artifactstore.config import parse_backend_routes, resolve_secret_ref
def test_parse_backend_routes_supports_wildcards() -> None:
routes = parse_backend_routes("guide-board:release-evidence=s3,*:*=local")
assert len(routes) == 2
assert routes[0].matches(producer="guide-board", retention_class="release-evidence")
assert not routes[0].matches(producer="guide-board", retention_class="raw-evidence")
assert routes[1].matches(producer="anything", retention_class="raw-evidence")
assert routes[0].backend_id == "s3"
def test_parse_backend_routes_rejects_invalid_entry() -> None:
with pytest.raises(ValueError):
parse_backend_routes("guide-board=s3")
def test_resolve_secret_ref_from_env(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setenv("ARTIFACTSTORE_TEST_SECRET", "secret")
assert resolve_secret_ref("env:ARTIFACTSTORE_TEST_SECRET") == "secret"
def test_resolve_secret_ref_from_file(tmp_path: Path) -> None:
secret_file = tmp_path / "secret"
secret_file.write_text("secret\n", encoding="utf-8")
assert resolve_secret_ref(f"file:{secret_file}") == "secret"
def test_resolve_secret_ref_rejects_literal() -> None:
with pytest.raises(ValueError):
resolve_secret_ref("literal-secret")

View File

@@ -158,6 +158,24 @@ async def test_ingest_hints_override_primary_algorithm(
assert result.primary_digest.hex == result.sha256_digest.hex
async def test_ingest_hints_route_to_named_backend(tmp_path: Path) -> None:
local = LocalBackend(tmp_path / "local", backend_id="local")
archive = LocalBackend(tmp_path / "archive", backend_id="archive")
dp = InProcessDataPlane(
{"local": local, "archive": archive},
default_backend_id="local",
)
result = await dp.ingest_stream(
_stream(b"route-me"),
hints=IngestHints(backend_id="archive"),
)
assert result.receipt.backend_id == "archive"
assert not (local.root / result.receipt.object_key).exists()
assert (archive.root / result.receipt.object_key).exists()
async def test_serve_missing_object_propagates_object_not_found(
dataplane: InProcessDataPlane,
) -> None:

View File

@@ -0,0 +1,196 @@
"""S3-compatible backend tests (ARTIFACT-STORE-WP-0004)."""
from __future__ import annotations
from collections.abc import AsyncIterator
from typing import Any
import pytest
from artifactstore.identity import ContentAddress, digest_bytes
from artifactstore.storage import ObjectNotFoundError, S3Backend, S3BackendConfig
async def _stream(data: bytes, chunk_size: int = 4) -> AsyncIterator[bytes]:
for i in range(0, len(data), chunk_size):
yield data[i : i + chunk_size]
async def _consume(stream: AsyncIterator[bytes]) -> bytes:
out = bytearray()
async for chunk in stream:
out.extend(chunk)
return bytes(out)
def _ca(data: bytes) -> ContentAddress:
return digest_bytes(data).primary.content_address
class FakeNotFoundError(Exception):
def __init__(self) -> None:
super().__init__("not found")
self.response = {"Error": {"Code": "NoSuchKey"}}
class FakeBody:
def __init__(self, data: bytes) -> None:
self._data = data
self._offset = 0
async def read(self, size: int) -> bytes:
if self._offset >= len(self._data):
return b""
chunk = self._data[self._offset : self._offset + size]
self._offset += len(chunk)
return chunk
class FakeS3Client:
def __init__(self) -> None:
self.objects: dict[str, bytes] = {}
self.calls: list[tuple[str, dict[str, Any]]] = []
self.uploads: dict[str, list[tuple[int, bytes]]] = {}
async def __aenter__(self) -> FakeS3Client:
return self
async def __aexit__(self, *_exc: object) -> None:
return None
async def put_object(self, **kwargs: Any) -> None:
self.calls.append(("put_object", kwargs))
self.objects[kwargs["Key"]] = kwargs["Body"]
async def create_multipart_upload(self, **kwargs: Any) -> dict[str, str]:
self.calls.append(("create_multipart_upload", kwargs))
upload_id = f"upload-{len(self.uploads) + 1}"
self.uploads[upload_id] = []
return {"UploadId": upload_id}
async def upload_part(self, **kwargs: Any) -> dict[str, str]:
self.calls.append(("upload_part", kwargs))
self.uploads[kwargs["UploadId"]].append((kwargs["PartNumber"], kwargs["Body"]))
return {"ETag": f"etag-{kwargs['PartNumber']}"}
async def complete_multipart_upload(self, **kwargs: Any) -> None:
self.calls.append(("complete_multipart_upload", kwargs))
parts = self.uploads[kwargs["UploadId"]]
self.objects[kwargs["Key"]] = b"".join(part for _num, part in sorted(parts))
async def abort_multipart_upload(self, **kwargs: Any) -> None:
self.calls.append(("abort_multipart_upload", kwargs))
async def get_object(self, **kwargs: Any) -> dict[str, FakeBody]:
self.calls.append(("get_object", kwargs))
try:
data = self.objects[kwargs["Key"]]
except KeyError as exc:
raise FakeNotFoundError from exc
range_header = kwargs.get("Range")
if range_header:
bounds = str(range_header).removeprefix("bytes=").split("-", maxsplit=1)
start = int(bounds[0])
end = int(bounds[1])
data = data[start : end + 1]
return {"Body": FakeBody(data)}
async def head_object(self, **kwargs: Any) -> dict[str, int]:
self.calls.append(("head_object", kwargs))
try:
data = self.objects[kwargs["Key"]]
except KeyError as exc:
raise FakeNotFoundError from exc
return {"ContentLength": len(data)}
async def delete_object(self, **kwargs: Any) -> None:
self.calls.append(("delete_object", kwargs))
self.objects.pop(kwargs["Key"], None)
async def head_bucket(self, **kwargs: Any) -> None:
self.calls.append(("head_bucket", kwargs))
@pytest.fixture
def fake_client() -> FakeS3Client:
return FakeS3Client()
@pytest.fixture
def backend(fake_client: FakeS3Client) -> S3Backend:
return S3Backend(
S3BackendConfig(
endpoint_url="http://minio.test",
region="us-east-1",
bucket="artifacts",
key_prefix="artifact-store",
storage_class="STANDARD",
sse="AES256",
multipart_threshold_bytes=8,
multipart_chunk_bytes=5,
),
client_factory=lambda: fake_client,
chunk_size=3,
)
async def test_put_get_head_delete_round_trip(
backend: S3Backend,
fake_client: FakeS3Client,
) -> None:
data = b"abc"
ca = _ca(data)
receipt = await backend.put(ca, _stream(data), size_hint=len(data))
digest = ca.to_digest()
assert receipt.object_key == (
f"artifact-store/{digest.algorithm}/{digest.hex[:2]}/{digest.hex[2:4]}/{digest.hex}"
)
assert fake_client.calls[0][0] == "put_object"
assert fake_client.calls[0][1]["StorageClass"] == "STANDARD"
assert fake_client.calls[0][1]["ServerSideEncryption"] == "AES256"
meta = await backend.head(ca)
assert meta.size_bytes == len(data)
stream = await backend.get(ca)
assert await _consume(stream) == data
await backend.delete(ca)
with pytest.raises(ObjectNotFoundError):
await backend.head(ca)
async def test_get_supports_range(backend: S3Backend, fake_client: FakeS3Client) -> None:
data = b"0123456789"
ca = _ca(data)
await backend.put(ca, _stream(data), size_hint=len(data))
stream = await backend.get(ca, byte_range=(2, 5))
assert await _consume(stream) == b"2345"
assert fake_client.calls[-1][1]["Range"] == "bytes=2-5"
async def test_put_uses_multipart_above_threshold(
backend: S3Backend,
fake_client: FakeS3Client,
) -> None:
data = b"abcdefghijkl"
ca = _ca(data)
receipt = await backend.put(ca, _stream(data), size_hint=len(data))
assert receipt.size_bytes == len(data)
assert [name for name, _kwargs in fake_client.calls] == [
"create_multipart_upload",
"upload_part",
"upload_part",
"upload_part",
"complete_multipart_upload",
]
stream = await backend.get(ca)
assert await _consume(stream) == data
async def test_health_uses_head_bucket(backend: S3Backend) -> None:
status = await backend.health()
assert status.healthy is True
assert status.backend_id == "s3"