Files
artifact-store/tests/unit/test_manifest.py
tegwick 9cbb9847ed WP-0001-T010: manifest model, canonical CBOR codec, JCS projection
Adds the manifest layer per ADR-0003. The canonical wire format is CBOR with
deterministic encoding (cbor2 canonical=True: definite-length, shortest-form
integers, sorted map keys); JCS (RFC 8785) is the JSON projection.

src/artifactstore/manifest/:
- model.py: frozen dataclasses for Manifest (manifest_version=1, package,
  files, storage_receipts, retention_summary, provenance) with restricted
  types (str/int/bool/None/list/dict) so CBOR and JCS round-trip losslessly.
- codec.py: encode (Manifest -> canonical CBOR bytes) and decode (CBOR bytes
  -> Manifest) via cbor2.
- projection.py: jcs_projection (Manifest -> RFC 8785 canonical JSON) plus
  cbor_from_jcs for cross-format round-trip verification.
- digest.py: manifest_digest returns the BLAKE3 content address of the
  manifest's canonical CBOR bytes (ADR-0001).
- __init__.py: re-exports the public surface.

tests/unit/test_manifest.py:
- decode(encode(m)) == m round-trip (hypothesis-parameterised).
- JCS↔CBOR round-trip: encode(decode(cbor_from_jcs(jcs(m)))) == encode(m).
- Byte stability of the canonical CBOR encoder across calls.
- manifest_digest matches independent BLAKE3 over encode(m).
- Decode rejects non-map CBOR.
- JCS projection sorts keys lexicographically.

Deps: jcs added to project requirements; mypy override for the jcs package
(no stubs published yet).

Gates: ruff clean, mypy --strict clean on 26 files, 26 tests pass.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-16 01:39:42 +02:00

168 lines
4.9 KiB
Python

"""Tests for :mod:`artifactstore.manifest` (ARTIFACT-STORE-WP-0001-T010)."""
from __future__ import annotations
from typing import Any
import cbor2
import pytest
from hypothesis import HealthCheck, given, settings
from hypothesis import strategies as st
from artifactstore.identity import PRIMARY_ALGORITHM, digest_bytes
from artifactstore.manifest import (
MANIFEST_VERSION,
FileEntry,
Manifest,
Package,
Provenance,
RetentionHold,
RetentionSummary,
StorageReceipt,
cbor_from_jcs,
decode,
encode,
jcs_projection,
manifest_digest,
)
def _sample_manifest(
*,
file_count: int = 1,
holds: int = 0,
metadata: dict[str, Any] | None = None,
) -> Manifest:
files = [
FileEntry(
id=f"file-{i:04d}",
relative_path=f"reports/r-{i}.json",
media_type="application/json",
size_bytes=42 + i,
digest_algorithm=PRIMARY_ALGORITHM,
digest_primary_hex="ab" * 32,
digest_sha256_hex="cd" * 32,
)
for i in range(file_count)
]
receipts = [
StorageReceipt(
file_id=f.id,
backend_id="local",
content_address=f"{f.digest_algorithm}:{f.digest_primary_hex}",
retrieval_tier="hot",
status="recorded",
)
for f in files
]
holds_list = [
RetentionHold(
hold_id=f"hold-{i:04d}",
reason="audit-prep",
actor="ops@example.org",
applied_at="2026-05-15T12:00:00Z",
)
for i in range(holds)
]
return Manifest(
manifest_version=MANIFEST_VERSION,
package=Package(
id="pkg-0001",
name="guide-board cmis run",
producer="guide-board",
subject="kontextual-engine",
retention_class="raw-evidence",
status="finalized",
created_at="2026-05-15T10:00:00Z",
finalized_at="2026-05-15T11:00:00Z",
expires_at="2027-05-15T11:00:00Z",
metadata=metadata if metadata is not None else {},
metadata_schema_id=None,
),
files=files,
storage_receipts=receipts,
retention_summary=RetentionSummary(
retention_class="raw-evidence",
expires_at="2027-05-15T11:00:00Z",
active_holds=holds_list,
last_retention_event_sequence=None,
),
provenance=Provenance(
source_commits={"guide-board": "abc1234"},
tool_versions={"guide-board": "0.1.0"},
environment={"runner": "ci"},
ingest_actor="codex",
ingest_timestamps={"received_at": "2026-05-15T10:00:00Z"},
),
)
def test_cbor_round_trip_simple() -> None:
m = _sample_manifest(file_count=3, holds=1)
assert decode(encode(m)) == m
def test_canonical_cbor_is_byte_stable_across_calls() -> None:
m = _sample_manifest(
file_count=5,
holds=0,
metadata={"zz": "z", "aa": "a", "mm": "m"},
)
assert encode(m) == encode(m)
def test_jcs_round_trip_via_cbor() -> None:
m = _sample_manifest(file_count=2, holds=2, metadata={"key": "value", "n": 7})
cbor_a = encode(m)
jcs_bytes = jcs_projection(m)
cbor_b = cbor_from_jcs(jcs_bytes)
m_decoded = decode(cbor_b)
cbor_c = encode(m_decoded)
assert cbor_a == cbor_c
def test_manifest_digest_is_blake3_of_canonical_cbor() -> None:
m = _sample_manifest()
ca = manifest_digest(m)
expected = digest_bytes(encode(m)).primary.content_address
assert ca == expected
assert str(ca).startswith("blake3:")
def test_decode_rejects_non_map_cbor() -> None:
bad = cbor2.dumps([1, 2, 3], canonical=True)
with pytest.raises(ValueError):
decode(bad)
def test_jcs_projection_is_canonical_json() -> None:
m = _sample_manifest(file_count=1, holds=0, metadata={"b": 2, "a": 1})
jcs_bytes = jcs_projection(m)
# JCS sorts object keys lexicographically; verify metadata keys appear
# in alphabetical order in the serialised output.
text = jcs_bytes.decode("utf-8")
assert text.find('"a":1') < text.find('"b":2')
@settings(suppress_health_check=[HealthCheck.too_slow], max_examples=25)
@given(
file_count=st.integers(min_value=0, max_value=5),
hold_count=st.integers(min_value=0, max_value=3),
)
def test_property_cbor_round_trip(file_count: int, hold_count: int) -> None:
m = _sample_manifest(file_count=file_count, holds=hold_count)
assert decode(encode(m)) == m
@settings(suppress_health_check=[HealthCheck.too_slow], max_examples=25)
@given(
file_count=st.integers(min_value=0, max_value=3),
hold_count=st.integers(min_value=0, max_value=2),
)
def test_property_jcs_to_cbor_round_trip(file_count: int, hold_count: int) -> None:
m = _sample_manifest(file_count=file_count, holds=hold_count)
cbor_a = encode(m)
cbor_b = cbor_from_jcs(jcs_projection(m))
cbor_c = encode(decode(cbor_b))
assert cbor_a == cbor_c