generated from coulomb/repo-seed
WP-0001-T010: manifest model, canonical CBOR codec, JCS projection
Adds the manifest layer per ADR-0003. The canonical wire format is CBOR with deterministic encoding (cbor2 canonical=True: definite-length, shortest-form integers, sorted map keys); JCS (RFC 8785) is the JSON projection. src/artifactstore/manifest/: - model.py: frozen dataclasses for Manifest (manifest_version=1, package, files, storage_receipts, retention_summary, provenance) with restricted types (str/int/bool/None/list/dict) so CBOR and JCS round-trip losslessly. - codec.py: encode (Manifest -> canonical CBOR bytes) and decode (CBOR bytes -> Manifest) via cbor2. - projection.py: jcs_projection (Manifest -> RFC 8785 canonical JSON) plus cbor_from_jcs for cross-format round-trip verification. - digest.py: manifest_digest returns the BLAKE3 content address of the manifest's canonical CBOR bytes (ADR-0001). - __init__.py: re-exports the public surface. tests/unit/test_manifest.py: - decode(encode(m)) == m round-trip (hypothesis-parameterised). - JCS↔CBOR round-trip: encode(decode(cbor_from_jcs(jcs(m)))) == encode(m). - Byte stability of the canonical CBOR encoder across calls. - manifest_digest matches independent BLAKE3 over encode(m). - Decode rejects non-map CBOR. - JCS projection sorts keys lexicographically. Deps: jcs added to project requirements; mypy override for the jcs package (no stubs published yet). Gates: ruff clean, mypy --strict clean on 26 files, 26 tests pass. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -1,5 +1,37 @@
|
||||
"""Package manifest model and canonical-CBOR codec.
|
||||
"""Package manifest model, codec, and projections (ADR-0003).
|
||||
|
||||
Real implementation lands in ARTIFACT-STORE-WP-0001-T010. See ADR-0003 for
|
||||
the canonicalisation pin (RFC 8949 §4.2.2).
|
||||
The canonical wire format is CBOR with deterministic encoding (RFC 8949).
|
||||
JCS (RFC 8785) is the JSON projection used for display and signing-tool
|
||||
interop. A package's external identifier is the BLAKE3 digest over its
|
||||
canonical CBOR bytes (ADR-0001).
|
||||
"""
|
||||
|
||||
from artifactstore.manifest.codec import decode, encode
|
||||
from artifactstore.manifest.digest import manifest_digest
|
||||
from artifactstore.manifest.model import (
|
||||
MANIFEST_VERSION,
|
||||
FileEntry,
|
||||
Manifest,
|
||||
Package,
|
||||
Provenance,
|
||||
RetentionHold,
|
||||
RetentionSummary,
|
||||
StorageReceipt,
|
||||
)
|
||||
from artifactstore.manifest.projection import cbor_from_jcs, jcs_projection
|
||||
|
||||
__all__ = [
|
||||
"MANIFEST_VERSION",
|
||||
"FileEntry",
|
||||
"Manifest",
|
||||
"Package",
|
||||
"Provenance",
|
||||
"RetentionHold",
|
||||
"RetentionSummary",
|
||||
"StorageReceipt",
|
||||
"cbor_from_jcs",
|
||||
"decode",
|
||||
"encode",
|
||||
"jcs_projection",
|
||||
"manifest_digest",
|
||||
]
|
||||
|
||||
63
src/artifactstore/manifest/codec.py
Normal file
63
src/artifactstore/manifest/codec.py
Normal file
@@ -0,0 +1,63 @@
|
||||
"""Canonical CBOR codec for the manifest (ADR-0003).
|
||||
|
||||
Canonicalisation uses cbor2's deterministic encoding (RFC 8949): definite-
|
||||
length encoding, shortest-form integers, sorted map keys. The same input
|
||||
manifest always produces the same byte sequence, which is the property the
|
||||
manifest digest (ADR-0001) and signature flows rely on.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import asdict
|
||||
from typing import Any
|
||||
|
||||
import cbor2
|
||||
|
||||
from artifactstore.manifest.model import (
|
||||
FileEntry,
|
||||
Manifest,
|
||||
Package,
|
||||
Provenance,
|
||||
RetentionHold,
|
||||
RetentionSummary,
|
||||
StorageReceipt,
|
||||
)
|
||||
|
||||
__all__ = ["decode", "encode"]
|
||||
|
||||
|
||||
def encode(manifest: Manifest) -> bytes:
|
||||
"""Serialise a :class:`Manifest` to canonical CBOR bytes."""
|
||||
payload = asdict(manifest)
|
||||
return cbor2.dumps(payload, canonical=True)
|
||||
|
||||
|
||||
def decode(data: bytes) -> Manifest:
|
||||
"""Parse canonical CBOR bytes back into a :class:`Manifest`."""
|
||||
payload = cbor2.loads(data)
|
||||
if not isinstance(payload, dict):
|
||||
raise ValueError("manifest must decode to a CBOR map")
|
||||
return _from_dict(payload)
|
||||
|
||||
|
||||
def _from_dict(payload: dict[str, Any]) -> Manifest:
|
||||
package = Package(**payload["package"])
|
||||
files = [FileEntry(**f) for f in payload["files"]]
|
||||
receipts = [StorageReceipt(**r) for r in payload["storage_receipts"]]
|
||||
rs_raw = payload["retention_summary"]
|
||||
holds = [RetentionHold(**h) for h in rs_raw["active_holds"]]
|
||||
retention_summary = RetentionSummary(
|
||||
retention_class=rs_raw["retention_class"],
|
||||
expires_at=rs_raw["expires_at"],
|
||||
active_holds=holds,
|
||||
last_retention_event_sequence=rs_raw["last_retention_event_sequence"],
|
||||
)
|
||||
provenance = Provenance(**payload["provenance"])
|
||||
return Manifest(
|
||||
manifest_version=payload["manifest_version"],
|
||||
package=package,
|
||||
files=files,
|
||||
storage_receipts=receipts,
|
||||
retention_summary=retention_summary,
|
||||
provenance=provenance,
|
||||
)
|
||||
19
src/artifactstore/manifest/digest.py
Normal file
19
src/artifactstore/manifest/digest.py
Normal file
@@ -0,0 +1,19 @@
|
||||
"""Manifest digest helper (ADR-0001 + ADR-0003)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from artifactstore.identity import PRIMARY_ALGORITHM, ContentAddress, digest_bytes
|
||||
from artifactstore.manifest.codec import encode
|
||||
from artifactstore.manifest.model import Manifest
|
||||
|
||||
__all__ = ["manifest_digest"]
|
||||
|
||||
|
||||
def manifest_digest(manifest: Manifest) -> ContentAddress:
|
||||
"""Return the canonical content address of a manifest.
|
||||
|
||||
Defined as the primary digest (default BLAKE3) over the manifest's
|
||||
canonical CBOR bytes. This is the package's external identifier.
|
||||
"""
|
||||
cbor_bytes = encode(manifest)
|
||||
return digest_bytes(cbor_bytes, primary=PRIMARY_ALGORITHM).primary.content_address
|
||||
97
src/artifactstore/manifest/model.py
Normal file
97
src/artifactstore/manifest/model.py
Normal file
@@ -0,0 +1,97 @@
|
||||
"""Manifest data model (ADR-0003).
|
||||
|
||||
Field types are restricted to CBOR-/JSON-compatible primitives (``str``,
|
||||
``int``, ``bool``, ``None``, ``list``, ``dict``) so the canonical CBOR
|
||||
encoding and the JCS JSON projection round-trip losslessly.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
MANIFEST_VERSION = 1
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class FileEntry:
|
||||
"""One stored file in a package."""
|
||||
|
||||
id: str
|
||||
relative_path: str
|
||||
media_type: str
|
||||
size_bytes: int
|
||||
digest_algorithm: str
|
||||
digest_primary_hex: str
|
||||
digest_sha256_hex: str
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class StorageReceipt:
|
||||
"""A record of where a file's bytes are stored."""
|
||||
|
||||
file_id: str
|
||||
backend_id: str
|
||||
content_address: str
|
||||
retrieval_tier: str
|
||||
status: str
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class RetentionHold:
|
||||
"""An active hold preventing deletion eligibility."""
|
||||
|
||||
hold_id: str
|
||||
reason: str
|
||||
actor: str
|
||||
applied_at: str
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class RetentionSummary:
|
||||
"""Retention state summary as of manifest write time."""
|
||||
|
||||
retention_class: str
|
||||
expires_at: str | None
|
||||
active_holds: list[RetentionHold]
|
||||
last_retention_event_sequence: int | None
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class Package:
|
||||
"""Package-level metadata."""
|
||||
|
||||
id: str
|
||||
name: str
|
||||
producer: str
|
||||
subject: str
|
||||
retention_class: str
|
||||
status: str
|
||||
created_at: str
|
||||
finalized_at: str | None
|
||||
expires_at: str | None
|
||||
metadata: dict[str, Any]
|
||||
metadata_schema_id: str | None
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class Provenance:
|
||||
"""Provenance fields recorded at ingest time."""
|
||||
|
||||
source_commits: dict[str, str]
|
||||
tool_versions: dict[str, str]
|
||||
environment: dict[str, str]
|
||||
ingest_actor: str
|
||||
ingest_timestamps: dict[str, str]
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class Manifest:
|
||||
"""The complete v1 manifest payload."""
|
||||
|
||||
manifest_version: int
|
||||
package: Package
|
||||
files: list[FileEntry]
|
||||
storage_receipts: list[StorageReceipt]
|
||||
retention_summary: RetentionSummary
|
||||
provenance: Provenance
|
||||
35
src/artifactstore/manifest/projection.py
Normal file
35
src/artifactstore/manifest/projection.py
Normal file
@@ -0,0 +1,35 @@
|
||||
"""JCS (RFC 8785) projection of the manifest.
|
||||
|
||||
Used for human display, signing-tool interop, and as the JSON form of the
|
||||
manifest. Round-trips losslessly with the canonical CBOR form as long as
|
||||
manifest values use only JSON-compatible primitive types.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from dataclasses import asdict
|
||||
|
||||
import cbor2
|
||||
import jcs
|
||||
|
||||
from artifactstore.manifest.model import Manifest
|
||||
|
||||
__all__ = ["cbor_from_jcs", "jcs_projection"]
|
||||
|
||||
|
||||
def jcs_projection(manifest: Manifest) -> bytes:
|
||||
"""Serialise a :class:`Manifest` to canonical JSON (RFC 8785)."""
|
||||
payload = asdict(manifest)
|
||||
return jcs.canonicalize(payload) # type: ignore[no-any-return]
|
||||
|
||||
|
||||
def cbor_from_jcs(jcs_bytes: bytes) -> bytes:
|
||||
"""Decode JCS JSON bytes and re-encode as canonical CBOR.
|
||||
|
||||
Used to validate the JCS↔CBOR round-trip property: ``cbor_from_jcs`` of
|
||||
a JCS projection must equal the canonical CBOR encoding of the same
|
||||
manifest (after re-decoding).
|
||||
"""
|
||||
payload = json.loads(jcs_bytes)
|
||||
return cbor2.dumps(payload, canonical=True)
|
||||
Reference in New Issue
Block a user