generated from coulomb/repo-seed
Adds the manifest layer per ADR-0003. The canonical wire format is CBOR with deterministic encoding (cbor2 canonical=True: definite-length, shortest-form integers, sorted map keys); JCS (RFC 8785) is the JSON projection. src/artifactstore/manifest/: - model.py: frozen dataclasses for Manifest (manifest_version=1, package, files, storage_receipts, retention_summary, provenance) with restricted types (str/int/bool/None/list/dict) so CBOR and JCS round-trip losslessly. - codec.py: encode (Manifest -> canonical CBOR bytes) and decode (CBOR bytes -> Manifest) via cbor2. - projection.py: jcs_projection (Manifest -> RFC 8785 canonical JSON) plus cbor_from_jcs for cross-format round-trip verification. - digest.py: manifest_digest returns the BLAKE3 content address of the manifest's canonical CBOR bytes (ADR-0001). - __init__.py: re-exports the public surface. tests/unit/test_manifest.py: - decode(encode(m)) == m round-trip (hypothesis-parameterised). - JCS↔CBOR round-trip: encode(decode(cbor_from_jcs(jcs(m)))) == encode(m). - Byte stability of the canonical CBOR encoder across calls. - manifest_digest matches independent BLAKE3 over encode(m). - Decode rejects non-map CBOR. - JCS projection sorts keys lexicographically. Deps: jcs added to project requirements; mypy override for the jcs package (no stubs published yet). Gates: ruff clean, mypy --strict clean on 26 files, 26 tests pass. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
98 lines
2.1 KiB
Python
98 lines
2.1 KiB
Python
"""Manifest data model (ADR-0003).
|
|
|
|
Field types are restricted to CBOR-/JSON-compatible primitives (``str``,
|
|
``int``, ``bool``, ``None``, ``list``, ``dict``) so the canonical CBOR
|
|
encoding and the JCS JSON projection round-trip losslessly.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
from typing import Any
|
|
|
|
MANIFEST_VERSION = 1
|
|
|
|
|
|
@dataclass(frozen=True, slots=True)
|
|
class FileEntry:
|
|
"""One stored file in a package."""
|
|
|
|
id: str
|
|
relative_path: str
|
|
media_type: str
|
|
size_bytes: int
|
|
digest_algorithm: str
|
|
digest_primary_hex: str
|
|
digest_sha256_hex: str
|
|
|
|
|
|
@dataclass(frozen=True, slots=True)
|
|
class StorageReceipt:
|
|
"""A record of where a file's bytes are stored."""
|
|
|
|
file_id: str
|
|
backend_id: str
|
|
content_address: str
|
|
retrieval_tier: str
|
|
status: str
|
|
|
|
|
|
@dataclass(frozen=True, slots=True)
|
|
class RetentionHold:
|
|
"""An active hold preventing deletion eligibility."""
|
|
|
|
hold_id: str
|
|
reason: str
|
|
actor: str
|
|
applied_at: str
|
|
|
|
|
|
@dataclass(frozen=True, slots=True)
|
|
class RetentionSummary:
|
|
"""Retention state summary as of manifest write time."""
|
|
|
|
retention_class: str
|
|
expires_at: str | None
|
|
active_holds: list[RetentionHold]
|
|
last_retention_event_sequence: int | None
|
|
|
|
|
|
@dataclass(frozen=True, slots=True)
|
|
class Package:
|
|
"""Package-level metadata."""
|
|
|
|
id: str
|
|
name: str
|
|
producer: str
|
|
subject: str
|
|
retention_class: str
|
|
status: str
|
|
created_at: str
|
|
finalized_at: str | None
|
|
expires_at: str | None
|
|
metadata: dict[str, Any]
|
|
metadata_schema_id: str | None
|
|
|
|
|
|
@dataclass(frozen=True, slots=True)
|
|
class Provenance:
|
|
"""Provenance fields recorded at ingest time."""
|
|
|
|
source_commits: dict[str, str]
|
|
tool_versions: dict[str, str]
|
|
environment: dict[str, str]
|
|
ingest_actor: str
|
|
ingest_timestamps: dict[str, str]
|
|
|
|
|
|
@dataclass(frozen=True, slots=True)
|
|
class Manifest:
|
|
"""The complete v1 manifest payload."""
|
|
|
|
manifest_version: int
|
|
package: Package
|
|
files: list[FileEntry]
|
|
storage_receipts: list[StorageReceipt]
|
|
retention_summary: RetentionSummary
|
|
provenance: Provenance
|