generated from coulomb/repo-seed
Adds the manifest layer per ADR-0003. The canonical wire format is CBOR with deterministic encoding (cbor2 canonical=True: definite-length, shortest-form integers, sorted map keys); JCS (RFC 8785) is the JSON projection. src/artifactstore/manifest/: - model.py: frozen dataclasses for Manifest (manifest_version=1, package, files, storage_receipts, retention_summary, provenance) with restricted types (str/int/bool/None/list/dict) so CBOR and JCS round-trip losslessly. - codec.py: encode (Manifest -> canonical CBOR bytes) and decode (CBOR bytes -> Manifest) via cbor2. - projection.py: jcs_projection (Manifest -> RFC 8785 canonical JSON) plus cbor_from_jcs for cross-format round-trip verification. - digest.py: manifest_digest returns the BLAKE3 content address of the manifest's canonical CBOR bytes (ADR-0001). - __init__.py: re-exports the public surface. tests/unit/test_manifest.py: - decode(encode(m)) == m round-trip (hypothesis-parameterised). - JCS↔CBOR round-trip: encode(decode(cbor_from_jcs(jcs(m)))) == encode(m). - Byte stability of the canonical CBOR encoder across calls. - manifest_digest matches independent BLAKE3 over encode(m). - Decode rejects non-map CBOR. - JCS projection sorts keys lexicographically. Deps: jcs added to project requirements; mypy override for the jcs package (no stubs published yet). Gates: ruff clean, mypy --strict clean on 26 files, 26 tests pass. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
168 lines
4.9 KiB
Python
168 lines
4.9 KiB
Python
"""Tests for :mod:`artifactstore.manifest` (ARTIFACT-STORE-WP-0001-T010)."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from typing import Any
|
|
|
|
import cbor2
|
|
import pytest
|
|
from hypothesis import HealthCheck, given, settings
|
|
from hypothesis import strategies as st
|
|
|
|
from artifactstore.identity import PRIMARY_ALGORITHM, digest_bytes
|
|
from artifactstore.manifest import (
|
|
MANIFEST_VERSION,
|
|
FileEntry,
|
|
Manifest,
|
|
Package,
|
|
Provenance,
|
|
RetentionHold,
|
|
RetentionSummary,
|
|
StorageReceipt,
|
|
cbor_from_jcs,
|
|
decode,
|
|
encode,
|
|
jcs_projection,
|
|
manifest_digest,
|
|
)
|
|
|
|
|
|
def _sample_manifest(
|
|
*,
|
|
file_count: int = 1,
|
|
holds: int = 0,
|
|
metadata: dict[str, Any] | None = None,
|
|
) -> Manifest:
|
|
files = [
|
|
FileEntry(
|
|
id=f"file-{i:04d}",
|
|
relative_path=f"reports/r-{i}.json",
|
|
media_type="application/json",
|
|
size_bytes=42 + i,
|
|
digest_algorithm=PRIMARY_ALGORITHM,
|
|
digest_primary_hex="ab" * 32,
|
|
digest_sha256_hex="cd" * 32,
|
|
)
|
|
for i in range(file_count)
|
|
]
|
|
receipts = [
|
|
StorageReceipt(
|
|
file_id=f.id,
|
|
backend_id="local",
|
|
content_address=f"{f.digest_algorithm}:{f.digest_primary_hex}",
|
|
retrieval_tier="hot",
|
|
status="recorded",
|
|
)
|
|
for f in files
|
|
]
|
|
holds_list = [
|
|
RetentionHold(
|
|
hold_id=f"hold-{i:04d}",
|
|
reason="audit-prep",
|
|
actor="ops@example.org",
|
|
applied_at="2026-05-15T12:00:00Z",
|
|
)
|
|
for i in range(holds)
|
|
]
|
|
return Manifest(
|
|
manifest_version=MANIFEST_VERSION,
|
|
package=Package(
|
|
id="pkg-0001",
|
|
name="guide-board cmis run",
|
|
producer="guide-board",
|
|
subject="kontextual-engine",
|
|
retention_class="raw-evidence",
|
|
status="finalized",
|
|
created_at="2026-05-15T10:00:00Z",
|
|
finalized_at="2026-05-15T11:00:00Z",
|
|
expires_at="2027-05-15T11:00:00Z",
|
|
metadata=metadata if metadata is not None else {},
|
|
metadata_schema_id=None,
|
|
),
|
|
files=files,
|
|
storage_receipts=receipts,
|
|
retention_summary=RetentionSummary(
|
|
retention_class="raw-evidence",
|
|
expires_at="2027-05-15T11:00:00Z",
|
|
active_holds=holds_list,
|
|
last_retention_event_sequence=None,
|
|
),
|
|
provenance=Provenance(
|
|
source_commits={"guide-board": "abc1234"},
|
|
tool_versions={"guide-board": "0.1.0"},
|
|
environment={"runner": "ci"},
|
|
ingest_actor="codex",
|
|
ingest_timestamps={"received_at": "2026-05-15T10:00:00Z"},
|
|
),
|
|
)
|
|
|
|
|
|
def test_cbor_round_trip_simple() -> None:
|
|
m = _sample_manifest(file_count=3, holds=1)
|
|
assert decode(encode(m)) == m
|
|
|
|
|
|
def test_canonical_cbor_is_byte_stable_across_calls() -> None:
|
|
m = _sample_manifest(
|
|
file_count=5,
|
|
holds=0,
|
|
metadata={"zz": "z", "aa": "a", "mm": "m"},
|
|
)
|
|
assert encode(m) == encode(m)
|
|
|
|
|
|
def test_jcs_round_trip_via_cbor() -> None:
|
|
m = _sample_manifest(file_count=2, holds=2, metadata={"key": "value", "n": 7})
|
|
cbor_a = encode(m)
|
|
jcs_bytes = jcs_projection(m)
|
|
cbor_b = cbor_from_jcs(jcs_bytes)
|
|
m_decoded = decode(cbor_b)
|
|
cbor_c = encode(m_decoded)
|
|
assert cbor_a == cbor_c
|
|
|
|
|
|
def test_manifest_digest_is_blake3_of_canonical_cbor() -> None:
|
|
m = _sample_manifest()
|
|
ca = manifest_digest(m)
|
|
expected = digest_bytes(encode(m)).primary.content_address
|
|
assert ca == expected
|
|
assert str(ca).startswith("blake3:")
|
|
|
|
|
|
def test_decode_rejects_non_map_cbor() -> None:
|
|
bad = cbor2.dumps([1, 2, 3], canonical=True)
|
|
with pytest.raises(ValueError):
|
|
decode(bad)
|
|
|
|
|
|
def test_jcs_projection_is_canonical_json() -> None:
|
|
m = _sample_manifest(file_count=1, holds=0, metadata={"b": 2, "a": 1})
|
|
jcs_bytes = jcs_projection(m)
|
|
# JCS sorts object keys lexicographically; verify metadata keys appear
|
|
# in alphabetical order in the serialised output.
|
|
text = jcs_bytes.decode("utf-8")
|
|
assert text.find('"a":1') < text.find('"b":2')
|
|
|
|
|
|
@settings(suppress_health_check=[HealthCheck.too_slow], max_examples=25)
|
|
@given(
|
|
file_count=st.integers(min_value=0, max_value=5),
|
|
hold_count=st.integers(min_value=0, max_value=3),
|
|
)
|
|
def test_property_cbor_round_trip(file_count: int, hold_count: int) -> None:
|
|
m = _sample_manifest(file_count=file_count, holds=hold_count)
|
|
assert decode(encode(m)) == m
|
|
|
|
|
|
@settings(suppress_health_check=[HealthCheck.too_slow], max_examples=25)
|
|
@given(
|
|
file_count=st.integers(min_value=0, max_value=3),
|
|
hold_count=st.integers(min_value=0, max_value=2),
|
|
)
|
|
def test_property_jcs_to_cbor_round_trip(file_count: int, hold_count: int) -> None:
|
|
m = _sample_manifest(file_count=file_count, holds=hold_count)
|
|
cbor_a = encode(m)
|
|
cbor_b = cbor_from_jcs(jcs_projection(m))
|
|
cbor_c = encode(decode(cbor_b))
|
|
assert cbor_a == cbor_c
|