generated from coulomb/repo-seed
WP-0001-T010: manifest model, canonical CBOR codec, JCS projection
Adds the manifest layer per ADR-0003. The canonical wire format is CBOR with deterministic encoding (cbor2 canonical=True: definite-length, shortest-form integers, sorted map keys); JCS (RFC 8785) is the JSON projection. src/artifactstore/manifest/: - model.py: frozen dataclasses for Manifest (manifest_version=1, package, files, storage_receipts, retention_summary, provenance) with restricted types (str/int/bool/None/list/dict) so CBOR and JCS round-trip losslessly. - codec.py: encode (Manifest -> canonical CBOR bytes) and decode (CBOR bytes -> Manifest) via cbor2. - projection.py: jcs_projection (Manifest -> RFC 8785 canonical JSON) plus cbor_from_jcs for cross-format round-trip verification. - digest.py: manifest_digest returns the BLAKE3 content address of the manifest's canonical CBOR bytes (ADR-0001). - __init__.py: re-exports the public surface. tests/unit/test_manifest.py: - decode(encode(m)) == m round-trip (hypothesis-parameterised). - JCS↔CBOR round-trip: encode(decode(cbor_from_jcs(jcs(m)))) == encode(m). - Byte stability of the canonical CBOR encoder across calls. - manifest_digest matches independent BLAKE3 over encode(m). - Decode rejects non-map CBOR. - JCS projection sorts keys lexicographically. Deps: jcs added to project requirements; mypy override for the jcs package (no stubs published yet). Gates: ruff clean, mypy --strict clean on 26 files, 26 tests pass. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -29,6 +29,7 @@ dependencies = [
|
||||
"alembic >= 1.13",
|
||||
"blake3 >= 0.4",
|
||||
"cbor2 >= 5.6",
|
||||
"jcs >= 0.2",
|
||||
"typer >= 0.12",
|
||||
"structlog >= 24.1",
|
||||
"pydantic >= 2.7",
|
||||
@@ -89,6 +90,10 @@ mypy_path = "src"
|
||||
explicit_package_bases = true
|
||||
namespace_packages = true
|
||||
|
||||
[[tool.mypy.overrides]]
|
||||
module = ["jcs"]
|
||||
ignore_missing_imports = true
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
asyncio_mode = "auto"
|
||||
testpaths = ["tests"]
|
||||
|
||||
@@ -1,5 +1,37 @@
|
||||
"""Package manifest model and canonical-CBOR codec.
|
||||
"""Package manifest model, codec, and projections (ADR-0003).
|
||||
|
||||
Real implementation lands in ARTIFACT-STORE-WP-0001-T010. See ADR-0003 for
|
||||
the canonicalisation pin (RFC 8949 §4.2.2).
|
||||
The canonical wire format is CBOR with deterministic encoding (RFC 8949).
|
||||
JCS (RFC 8785) is the JSON projection used for display and signing-tool
|
||||
interop. A package's external identifier is the BLAKE3 digest over its
|
||||
canonical CBOR bytes (ADR-0001).
|
||||
"""
|
||||
|
||||
from artifactstore.manifest.codec import decode, encode
|
||||
from artifactstore.manifest.digest import manifest_digest
|
||||
from artifactstore.manifest.model import (
|
||||
MANIFEST_VERSION,
|
||||
FileEntry,
|
||||
Manifest,
|
||||
Package,
|
||||
Provenance,
|
||||
RetentionHold,
|
||||
RetentionSummary,
|
||||
StorageReceipt,
|
||||
)
|
||||
from artifactstore.manifest.projection import cbor_from_jcs, jcs_projection
|
||||
|
||||
__all__ = [
|
||||
"MANIFEST_VERSION",
|
||||
"FileEntry",
|
||||
"Manifest",
|
||||
"Package",
|
||||
"Provenance",
|
||||
"RetentionHold",
|
||||
"RetentionSummary",
|
||||
"StorageReceipt",
|
||||
"cbor_from_jcs",
|
||||
"decode",
|
||||
"encode",
|
||||
"jcs_projection",
|
||||
"manifest_digest",
|
||||
]
|
||||
|
||||
63
src/artifactstore/manifest/codec.py
Normal file
63
src/artifactstore/manifest/codec.py
Normal file
@@ -0,0 +1,63 @@
|
||||
"""Canonical CBOR codec for the manifest (ADR-0003).
|
||||
|
||||
Canonicalisation uses cbor2's deterministic encoding (RFC 8949): definite-
|
||||
length encoding, shortest-form integers, sorted map keys. The same input
|
||||
manifest always produces the same byte sequence, which is the property the
|
||||
manifest digest (ADR-0001) and signature flows rely on.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import asdict
|
||||
from typing import Any
|
||||
|
||||
import cbor2
|
||||
|
||||
from artifactstore.manifest.model import (
|
||||
FileEntry,
|
||||
Manifest,
|
||||
Package,
|
||||
Provenance,
|
||||
RetentionHold,
|
||||
RetentionSummary,
|
||||
StorageReceipt,
|
||||
)
|
||||
|
||||
__all__ = ["decode", "encode"]
|
||||
|
||||
|
||||
def encode(manifest: Manifest) -> bytes:
|
||||
"""Serialise a :class:`Manifest` to canonical CBOR bytes."""
|
||||
payload = asdict(manifest)
|
||||
return cbor2.dumps(payload, canonical=True)
|
||||
|
||||
|
||||
def decode(data: bytes) -> Manifest:
|
||||
"""Parse canonical CBOR bytes back into a :class:`Manifest`."""
|
||||
payload = cbor2.loads(data)
|
||||
if not isinstance(payload, dict):
|
||||
raise ValueError("manifest must decode to a CBOR map")
|
||||
return _from_dict(payload)
|
||||
|
||||
|
||||
def _from_dict(payload: dict[str, Any]) -> Manifest:
|
||||
package = Package(**payload["package"])
|
||||
files = [FileEntry(**f) for f in payload["files"]]
|
||||
receipts = [StorageReceipt(**r) for r in payload["storage_receipts"]]
|
||||
rs_raw = payload["retention_summary"]
|
||||
holds = [RetentionHold(**h) for h in rs_raw["active_holds"]]
|
||||
retention_summary = RetentionSummary(
|
||||
retention_class=rs_raw["retention_class"],
|
||||
expires_at=rs_raw["expires_at"],
|
||||
active_holds=holds,
|
||||
last_retention_event_sequence=rs_raw["last_retention_event_sequence"],
|
||||
)
|
||||
provenance = Provenance(**payload["provenance"])
|
||||
return Manifest(
|
||||
manifest_version=payload["manifest_version"],
|
||||
package=package,
|
||||
files=files,
|
||||
storage_receipts=receipts,
|
||||
retention_summary=retention_summary,
|
||||
provenance=provenance,
|
||||
)
|
||||
19
src/artifactstore/manifest/digest.py
Normal file
19
src/artifactstore/manifest/digest.py
Normal file
@@ -0,0 +1,19 @@
|
||||
"""Manifest digest helper (ADR-0001 + ADR-0003)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from artifactstore.identity import PRIMARY_ALGORITHM, ContentAddress, digest_bytes
|
||||
from artifactstore.manifest.codec import encode
|
||||
from artifactstore.manifest.model import Manifest
|
||||
|
||||
__all__ = ["manifest_digest"]
|
||||
|
||||
|
||||
def manifest_digest(manifest: Manifest) -> ContentAddress:
|
||||
"""Return the canonical content address of a manifest.
|
||||
|
||||
Defined as the primary digest (default BLAKE3) over the manifest's
|
||||
canonical CBOR bytes. This is the package's external identifier.
|
||||
"""
|
||||
cbor_bytes = encode(manifest)
|
||||
return digest_bytes(cbor_bytes, primary=PRIMARY_ALGORITHM).primary.content_address
|
||||
97
src/artifactstore/manifest/model.py
Normal file
97
src/artifactstore/manifest/model.py
Normal file
@@ -0,0 +1,97 @@
|
||||
"""Manifest data model (ADR-0003).
|
||||
|
||||
Field types are restricted to CBOR-/JSON-compatible primitives (``str``,
|
||||
``int``, ``bool``, ``None``, ``list``, ``dict``) so the canonical CBOR
|
||||
encoding and the JCS JSON projection round-trip losslessly.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
MANIFEST_VERSION = 1
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class FileEntry:
|
||||
"""One stored file in a package."""
|
||||
|
||||
id: str
|
||||
relative_path: str
|
||||
media_type: str
|
||||
size_bytes: int
|
||||
digest_algorithm: str
|
||||
digest_primary_hex: str
|
||||
digest_sha256_hex: str
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class StorageReceipt:
|
||||
"""A record of where a file's bytes are stored."""
|
||||
|
||||
file_id: str
|
||||
backend_id: str
|
||||
content_address: str
|
||||
retrieval_tier: str
|
||||
status: str
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class RetentionHold:
|
||||
"""An active hold preventing deletion eligibility."""
|
||||
|
||||
hold_id: str
|
||||
reason: str
|
||||
actor: str
|
||||
applied_at: str
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class RetentionSummary:
|
||||
"""Retention state summary as of manifest write time."""
|
||||
|
||||
retention_class: str
|
||||
expires_at: str | None
|
||||
active_holds: list[RetentionHold]
|
||||
last_retention_event_sequence: int | None
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class Package:
|
||||
"""Package-level metadata."""
|
||||
|
||||
id: str
|
||||
name: str
|
||||
producer: str
|
||||
subject: str
|
||||
retention_class: str
|
||||
status: str
|
||||
created_at: str
|
||||
finalized_at: str | None
|
||||
expires_at: str | None
|
||||
metadata: dict[str, Any]
|
||||
metadata_schema_id: str | None
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class Provenance:
|
||||
"""Provenance fields recorded at ingest time."""
|
||||
|
||||
source_commits: dict[str, str]
|
||||
tool_versions: dict[str, str]
|
||||
environment: dict[str, str]
|
||||
ingest_actor: str
|
||||
ingest_timestamps: dict[str, str]
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class Manifest:
|
||||
"""The complete v1 manifest payload."""
|
||||
|
||||
manifest_version: int
|
||||
package: Package
|
||||
files: list[FileEntry]
|
||||
storage_receipts: list[StorageReceipt]
|
||||
retention_summary: RetentionSummary
|
||||
provenance: Provenance
|
||||
35
src/artifactstore/manifest/projection.py
Normal file
35
src/artifactstore/manifest/projection.py
Normal file
@@ -0,0 +1,35 @@
|
||||
"""JCS (RFC 8785) projection of the manifest.
|
||||
|
||||
Used for human display, signing-tool interop, and as the JSON form of the
|
||||
manifest. Round-trips losslessly with the canonical CBOR form as long as
|
||||
manifest values use only JSON-compatible primitive types.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from dataclasses import asdict
|
||||
|
||||
import cbor2
|
||||
import jcs
|
||||
|
||||
from artifactstore.manifest.model import Manifest
|
||||
|
||||
__all__ = ["cbor_from_jcs", "jcs_projection"]
|
||||
|
||||
|
||||
def jcs_projection(manifest: Manifest) -> bytes:
|
||||
"""Serialise a :class:`Manifest` to canonical JSON (RFC 8785)."""
|
||||
payload = asdict(manifest)
|
||||
return jcs.canonicalize(payload) # type: ignore[no-any-return]
|
||||
|
||||
|
||||
def cbor_from_jcs(jcs_bytes: bytes) -> bytes:
|
||||
"""Decode JCS JSON bytes and re-encode as canonical CBOR.
|
||||
|
||||
Used to validate the JCS↔CBOR round-trip property: ``cbor_from_jcs`` of
|
||||
a JCS projection must equal the canonical CBOR encoding of the same
|
||||
manifest (after re-decoding).
|
||||
"""
|
||||
payload = json.loads(jcs_bytes)
|
||||
return cbor2.dumps(payload, canonical=True)
|
||||
167
tests/unit/test_manifest.py
Normal file
167
tests/unit/test_manifest.py
Normal file
@@ -0,0 +1,167 @@
|
||||
"""Tests for :mod:`artifactstore.manifest` (ARTIFACT-STORE-WP-0001-T010)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
import cbor2
|
||||
import pytest
|
||||
from hypothesis import HealthCheck, given, settings
|
||||
from hypothesis import strategies as st
|
||||
|
||||
from artifactstore.identity import PRIMARY_ALGORITHM, digest_bytes
|
||||
from artifactstore.manifest import (
|
||||
MANIFEST_VERSION,
|
||||
FileEntry,
|
||||
Manifest,
|
||||
Package,
|
||||
Provenance,
|
||||
RetentionHold,
|
||||
RetentionSummary,
|
||||
StorageReceipt,
|
||||
cbor_from_jcs,
|
||||
decode,
|
||||
encode,
|
||||
jcs_projection,
|
||||
manifest_digest,
|
||||
)
|
||||
|
||||
|
||||
def _sample_manifest(
|
||||
*,
|
||||
file_count: int = 1,
|
||||
holds: int = 0,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
) -> Manifest:
|
||||
files = [
|
||||
FileEntry(
|
||||
id=f"file-{i:04d}",
|
||||
relative_path=f"reports/r-{i}.json",
|
||||
media_type="application/json",
|
||||
size_bytes=42 + i,
|
||||
digest_algorithm=PRIMARY_ALGORITHM,
|
||||
digest_primary_hex="ab" * 32,
|
||||
digest_sha256_hex="cd" * 32,
|
||||
)
|
||||
for i in range(file_count)
|
||||
]
|
||||
receipts = [
|
||||
StorageReceipt(
|
||||
file_id=f.id,
|
||||
backend_id="local",
|
||||
content_address=f"{f.digest_algorithm}:{f.digest_primary_hex}",
|
||||
retrieval_tier="hot",
|
||||
status="recorded",
|
||||
)
|
||||
for f in files
|
||||
]
|
||||
holds_list = [
|
||||
RetentionHold(
|
||||
hold_id=f"hold-{i:04d}",
|
||||
reason="audit-prep",
|
||||
actor="ops@example.org",
|
||||
applied_at="2026-05-15T12:00:00Z",
|
||||
)
|
||||
for i in range(holds)
|
||||
]
|
||||
return Manifest(
|
||||
manifest_version=MANIFEST_VERSION,
|
||||
package=Package(
|
||||
id="pkg-0001",
|
||||
name="guide-board cmis run",
|
||||
producer="guide-board",
|
||||
subject="kontextual-engine",
|
||||
retention_class="raw-evidence",
|
||||
status="finalized",
|
||||
created_at="2026-05-15T10:00:00Z",
|
||||
finalized_at="2026-05-15T11:00:00Z",
|
||||
expires_at="2027-05-15T11:00:00Z",
|
||||
metadata=metadata if metadata is not None else {},
|
||||
metadata_schema_id=None,
|
||||
),
|
||||
files=files,
|
||||
storage_receipts=receipts,
|
||||
retention_summary=RetentionSummary(
|
||||
retention_class="raw-evidence",
|
||||
expires_at="2027-05-15T11:00:00Z",
|
||||
active_holds=holds_list,
|
||||
last_retention_event_sequence=None,
|
||||
),
|
||||
provenance=Provenance(
|
||||
source_commits={"guide-board": "abc1234"},
|
||||
tool_versions={"guide-board": "0.1.0"},
|
||||
environment={"runner": "ci"},
|
||||
ingest_actor="codex",
|
||||
ingest_timestamps={"received_at": "2026-05-15T10:00:00Z"},
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def test_cbor_round_trip_simple() -> None:
|
||||
m = _sample_manifest(file_count=3, holds=1)
|
||||
assert decode(encode(m)) == m
|
||||
|
||||
|
||||
def test_canonical_cbor_is_byte_stable_across_calls() -> None:
|
||||
m = _sample_manifest(
|
||||
file_count=5,
|
||||
holds=0,
|
||||
metadata={"zz": "z", "aa": "a", "mm": "m"},
|
||||
)
|
||||
assert encode(m) == encode(m)
|
||||
|
||||
|
||||
def test_jcs_round_trip_via_cbor() -> None:
|
||||
m = _sample_manifest(file_count=2, holds=2, metadata={"key": "value", "n": 7})
|
||||
cbor_a = encode(m)
|
||||
jcs_bytes = jcs_projection(m)
|
||||
cbor_b = cbor_from_jcs(jcs_bytes)
|
||||
m_decoded = decode(cbor_b)
|
||||
cbor_c = encode(m_decoded)
|
||||
assert cbor_a == cbor_c
|
||||
|
||||
|
||||
def test_manifest_digest_is_blake3_of_canonical_cbor() -> None:
|
||||
m = _sample_manifest()
|
||||
ca = manifest_digest(m)
|
||||
expected = digest_bytes(encode(m)).primary.content_address
|
||||
assert ca == expected
|
||||
assert str(ca).startswith("blake3:")
|
||||
|
||||
|
||||
def test_decode_rejects_non_map_cbor() -> None:
|
||||
bad = cbor2.dumps([1, 2, 3], canonical=True)
|
||||
with pytest.raises(ValueError):
|
||||
decode(bad)
|
||||
|
||||
|
||||
def test_jcs_projection_is_canonical_json() -> None:
|
||||
m = _sample_manifest(file_count=1, holds=0, metadata={"b": 2, "a": 1})
|
||||
jcs_bytes = jcs_projection(m)
|
||||
# JCS sorts object keys lexicographically; verify metadata keys appear
|
||||
# in alphabetical order in the serialised output.
|
||||
text = jcs_bytes.decode("utf-8")
|
||||
assert text.find('"a":1') < text.find('"b":2')
|
||||
|
||||
|
||||
@settings(suppress_health_check=[HealthCheck.too_slow], max_examples=25)
|
||||
@given(
|
||||
file_count=st.integers(min_value=0, max_value=5),
|
||||
hold_count=st.integers(min_value=0, max_value=3),
|
||||
)
|
||||
def test_property_cbor_round_trip(file_count: int, hold_count: int) -> None:
|
||||
m = _sample_manifest(file_count=file_count, holds=hold_count)
|
||||
assert decode(encode(m)) == m
|
||||
|
||||
|
||||
@settings(suppress_health_check=[HealthCheck.too_slow], max_examples=25)
|
||||
@given(
|
||||
file_count=st.integers(min_value=0, max_value=3),
|
||||
hold_count=st.integers(min_value=0, max_value=2),
|
||||
)
|
||||
def test_property_jcs_to_cbor_round_trip(file_count: int, hold_count: int) -> None:
|
||||
m = _sample_manifest(file_count=file_count, holds=hold_count)
|
||||
cbor_a = encode(m)
|
||||
cbor_b = cbor_from_jcs(jcs_projection(m))
|
||||
cbor_c = encode(decode(cbor_b))
|
||||
assert cbor_a == cbor_c
|
||||
11
uv.lock
generated
11
uv.lock
generated
@@ -70,6 +70,7 @@ dependencies = [
|
||||
{ name = "blake3" },
|
||||
{ name = "cbor2" },
|
||||
{ name = "fastapi" },
|
||||
{ name = "jcs" },
|
||||
{ name = "pydantic" },
|
||||
{ name = "pydantic-settings" },
|
||||
{ name = "sqlalchemy" },
|
||||
@@ -108,6 +109,7 @@ requires-dist = [
|
||||
{ name = "fastapi", specifier = ">=0.115" },
|
||||
{ name = "httpx", marker = "extra == 'dev'", specifier = ">=0.27" },
|
||||
{ name = "hypothesis", marker = "extra == 'dev'", specifier = ">=6.100" },
|
||||
{ name = "jcs", specifier = ">=0.2" },
|
||||
{ name = "mypy", marker = "extra == 'dev'", specifier = ">=1.10" },
|
||||
{ name = "pydantic", specifier = ">=2.7" },
|
||||
{ name = "pydantic-settings", specifier = ">=2.4" },
|
||||
@@ -505,6 +507,15 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jcs"
|
||||
version = "0.2.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/47/e5/9d547f0d42ba00f68eec773aa7b2145e0c0335eb632cbaf519f480a429af/jcs-0.2.1.tar.gz", hash = "sha256:9f20360b2f3b0a410d65493b448f96306d80e37fb46283f3f4aa5db2c7c1472b", size = 6886 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/c0/d4/9a99bc15266a842bd14a1913afdb05182888ebab035666c1ce8a64537ca2/jcs-0.2.1-py3-none-any.whl", hash = "sha256:e23a3e1de60f832d33cd811bb9c3b3be79219cdf95f63b88f0972732c3fa8476", size = 7603 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "librt"
|
||||
version = "0.11.0"
|
||||
|
||||
@@ -118,7 +118,7 @@ Acceptance:
|
||||
|
||||
```task
|
||||
id: ARTIFACT-STORE-WP-0001-T010
|
||||
status: todo
|
||||
status: done
|
||||
priority: high
|
||||
state_hub_task_id: "8b45a3d9-aa19-4ae8-afe0-687417bf12d0"
|
||||
```
|
||||
|
||||
Reference in New Issue
Block a user