generated from coulomb/repo-seed
src/artifactstore/identity/__init__.py: - Digest: frozen, hashable dataclass (algorithm + lowercase hex), validated. - ContentAddress: canonical `<algorithm>:<hex>` string form with validating parser (to_digest) and emitter (str / from_digest). - DigestPair: dual-digest result (primary + sha256) from a single hashing pass. - Algorithm registry: register_algorithm / get_algorithm / list_algorithms with name validation `[a-z][a-z0-9_-]*`. - digest_bytes (sync) and digest_stream (async) — single-pass dual hashing. - BLAKE3 registered as PRIMARY_ALGORITHM, SHA-256 as INTEROP_ALGORITHM at module import. tests/unit/test_identity.py: - Hypothesis property test asserts digest_bytes matches hashlib.sha256 and blake3.blake3 for random byte sequences up to 4 KiB. - digest_stream invariants: equivalence with digest_bytes under chunked input; defaults to BLAKE3 primary; always computes SHA-256; handles empty input. - Digest / ContentAddress invariants: rejects uppercase hex, empty fields, odd hex length, missing separator; frozen and hashable. Gates: ruff clean, mypy --strict clean on 21 source files, 18 tests pass. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
126 lines
3.8 KiB
Python
126 lines
3.8 KiB
Python
"""Tests for :mod:`artifactstore.identity` (ARTIFACT-STORE-WP-0001-T009)."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import hashlib
|
|
from collections.abc import AsyncIterator
|
|
from dataclasses import FrozenInstanceError
|
|
|
|
import blake3 as _blake3
|
|
import pytest
|
|
from hypothesis import given
|
|
from hypothesis import strategies as st
|
|
|
|
from artifactstore.identity import (
|
|
INTEROP_ALGORITHM,
|
|
PRIMARY_ALGORITHM,
|
|
ContentAddress,
|
|
Digest,
|
|
digest_bytes,
|
|
digest_stream,
|
|
get_algorithm,
|
|
list_algorithms,
|
|
register_algorithm,
|
|
)
|
|
|
|
|
|
def test_registry_has_blake3_and_sha256() -> None:
|
|
algos = list_algorithms()
|
|
assert PRIMARY_ALGORITHM in algos
|
|
assert INTEROP_ALGORITHM in algos
|
|
|
|
|
|
def test_get_algorithm_unknown_raises() -> None:
|
|
with pytest.raises(KeyError):
|
|
get_algorithm("does-not-exist")
|
|
|
|
|
|
def test_register_algorithm_rejects_bad_names() -> None:
|
|
with pytest.raises(ValueError):
|
|
register_algorithm("UPPER", hashlib.sha256) # uppercase
|
|
with pytest.raises(ValueError):
|
|
register_algorithm("1bad", hashlib.sha256) # leading digit
|
|
|
|
|
|
def test_digest_rejects_uppercase_hex() -> None:
|
|
with pytest.raises(ValueError):
|
|
Digest(algorithm="sha256", hex="DEADBEEF")
|
|
|
|
|
|
def test_digest_rejects_empty_fields() -> None:
|
|
with pytest.raises(ValueError):
|
|
Digest(algorithm="", hex="ab")
|
|
with pytest.raises(ValueError):
|
|
Digest(algorithm="sha256", hex="")
|
|
|
|
|
|
def test_digest_rejects_odd_length_hex() -> None:
|
|
with pytest.raises(ValueError):
|
|
Digest(algorithm="sha256", hex="abc")
|
|
|
|
|
|
def test_digest_is_hashable() -> None:
|
|
a = Digest(algorithm="sha256", hex="ab" * 32)
|
|
b = Digest(algorithm="sha256", hex="ab" * 32)
|
|
assert hash(a) == hash(b)
|
|
assert a == b
|
|
# usable as set / dict key
|
|
assert {a, b} == {a}
|
|
|
|
|
|
def test_digest_is_frozen() -> None:
|
|
d = Digest(algorithm="sha256", hex="ab" * 32)
|
|
with pytest.raises(FrozenInstanceError):
|
|
d.algorithm = "blake3" # type: ignore[misc]
|
|
|
|
|
|
def test_content_address_round_trips() -> None:
|
|
d = Digest(algorithm="sha256", hex="ab" * 32)
|
|
ca = d.content_address
|
|
assert str(ca) == "sha256:" + "ab" * 32
|
|
assert ca.to_digest() == d
|
|
assert ContentAddress.from_digest(d) == ca
|
|
|
|
|
|
def test_content_address_rejects_malformed() -> None:
|
|
with pytest.raises(ValueError):
|
|
ContentAddress("not-a-digest")
|
|
with pytest.raises(ValueError):
|
|
ContentAddress("sha256:DEADBEEF") # uppercase hex
|
|
with pytest.raises(ValueError):
|
|
ContentAddress(":abcd") # empty algorithm
|
|
|
|
|
|
@given(st.binary(max_size=4096))
|
|
def test_digest_bytes_matches_reference_libs(data: bytes) -> None:
|
|
pair = digest_bytes(data)
|
|
assert pair.primary.algorithm == PRIMARY_ALGORITHM
|
|
assert pair.sha256.algorithm == INTEROP_ALGORITHM
|
|
assert pair.sha256.hex == hashlib.sha256(data).hexdigest()
|
|
assert pair.primary.hex == _blake3.blake3(data).hexdigest()
|
|
|
|
|
|
async def _chunked(data: bytes, chunk: int) -> AsyncIterator[bytes]:
|
|
for i in range(0, len(data), chunk):
|
|
yield data[i : i + chunk]
|
|
|
|
|
|
async def test_digest_stream_matches_digest_bytes() -> None:
|
|
data = b"the quick brown fox jumps over the lazy dog" * 100
|
|
pair = await digest_stream(_chunked(data, 17))
|
|
assert pair == digest_bytes(data)
|
|
|
|
|
|
async def test_digest_stream_defaults_blake3_and_always_computes_sha256() -> None:
|
|
pair = await digest_stream(_chunked(b"hello", 2))
|
|
assert pair.primary.algorithm == PRIMARY_ALGORITHM
|
|
assert pair.sha256.algorithm == INTEROP_ALGORITHM
|
|
assert pair.sha256.hex == hashlib.sha256(b"hello").hexdigest()
|
|
assert pair.primary.hex == _blake3.blake3(b"hello").hexdigest()
|
|
|
|
|
|
async def test_digest_stream_handles_empty_input() -> None:
|
|
pair = await digest_stream(_chunked(b"", 1))
|
|
assert pair.sha256.hex == hashlib.sha256(b"").hexdigest()
|
|
assert pair.primary.hex == _blake3.blake3(b"").hexdigest()
|