generated from coulomb/repo-seed
Two of yesterday's archives silently dropped infospace content: the default include set was missing contracts/, so wealth-vsm-generation-pilot (16 files) and wealth-vsm-legacy-slice (12 files) were preserved as 14 and 10 files respectively. Fix the include set and make silent drops visible. - DEFAULT_INCLUDE now: infospace.yaml, artifacts, contracts, schemas, workflows, output, reports, exports - ArchiveRecord gains skipped_top_level: top-level entries present in the live root that are not in the include set, not excluded, and not auto- hidden (hidden dotfiles, empty dirs, .store/index.yaml). Surfaces in index.yaml only when non-empty. - Re-archived the two affected pilots with correct counts. Prior records remain in each index.yaml as history. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
236 lines
7.6 KiB
Python
236 lines
7.6 KiB
Python
from __future__ import annotations
|
|
|
|
import filecmp
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
import yaml
|
|
|
|
from infospace_bench import (
|
|
ArchiveRecord,
|
|
InfospaceError,
|
|
RestoredArchive,
|
|
add_artifact,
|
|
annotate_retention,
|
|
archive_infospace,
|
|
create_infospace,
|
|
list_archives,
|
|
restore_archive,
|
|
)
|
|
from infospace_bench.archive import (
|
|
ARCHIVE_INDEX_PATH,
|
|
ARCHIVE_STORE_DIR,
|
|
DEFAULT_RETENTION_CLASS,
|
|
PRODUCER,
|
|
)
|
|
|
|
|
|
def _restored_paths_via_round_trip(
|
|
record: ArchiveRecord, source: Path, tmp_path: Path
|
|
) -> list[str]:
|
|
target = tmp_path / f"restore-{record.package_id[:8]}"
|
|
result = restore_archive(
|
|
record.package_id, target=target, source_infospace=source,
|
|
)
|
|
return list(result.restored_paths)
|
|
|
|
|
|
def _seed_infospace(workspace: Path, slug: str = "demo") -> Path:
|
|
create_infospace(workspace, slug, name="Demo", topic_domain="Test")
|
|
root = workspace / "infospaces" / slug
|
|
source = workspace / "source.md"
|
|
source.write_text("# source\n", encoding="utf-8")
|
|
add_artifact(root, source, kind="source", title="Source One")
|
|
(root / "reports" / "summary.md").write_text("# summary\n", encoding="utf-8")
|
|
return root
|
|
|
|
|
|
def test_archive_infospace_writes_index_and_finalizes_package(tmp_path: Path) -> None:
|
|
root = _seed_infospace(tmp_path)
|
|
|
|
record = archive_infospace(root, note="first archive")
|
|
|
|
assert isinstance(record, ArchiveRecord)
|
|
assert record.package_id
|
|
assert record.manifest_digest.startswith("blake3:")
|
|
assert record.retention_class == DEFAULT_RETENTION_CLASS
|
|
assert record.producer == PRODUCER
|
|
assert record.subject == "demo"
|
|
assert record.note == "first archive"
|
|
assert record.file_count >= 4 # infospace.yaml, index.yaml, source.md, summary.md
|
|
|
|
index_path = root / ARCHIVE_INDEX_PATH
|
|
assert index_path.is_file()
|
|
data = yaml.safe_load(index_path.read_text(encoding="utf-8"))
|
|
assert isinstance(data, dict)
|
|
assert len(data["archives"]) == 1
|
|
assert data["archives"][0]["package_id"] == record.package_id
|
|
|
|
store_root = root / ARCHIVE_STORE_DIR
|
|
assert (store_root / "registry.sqlite").is_file()
|
|
assert (store_root / "storage").is_dir()
|
|
|
|
|
|
def test_list_archives_returns_recorded_entries(tmp_path: Path) -> None:
|
|
root = _seed_infospace(tmp_path)
|
|
|
|
assert list_archives(root) == []
|
|
first = archive_infospace(root, note="alpha")
|
|
second = archive_infospace(root, note="beta")
|
|
|
|
archives = list_archives(root)
|
|
assert [a.package_id for a in archives] == [first.package_id, second.package_id]
|
|
assert [a.note for a in archives] == ["alpha", "beta"]
|
|
|
|
|
|
def test_archive_excludes_store_dir_to_avoid_recursive_capture(tmp_path: Path) -> None:
|
|
root = _seed_infospace(tmp_path)
|
|
|
|
first = archive_infospace(root)
|
|
second = archive_infospace(root)
|
|
|
|
# The store dir grows on the first call; the second call must not pick up
|
|
# any of its bytes (otherwise file_count would balloon).
|
|
assert second.file_count == first.file_count
|
|
second_record = list_archives(root)[1]
|
|
assert all(
|
|
not path.startswith(ARCHIVE_STORE_DIR)
|
|
for path in second_record.included_paths
|
|
)
|
|
|
|
|
|
def test_archive_respects_caller_supplied_include_set(tmp_path: Path) -> None:
|
|
root = _seed_infospace(tmp_path)
|
|
|
|
record = archive_infospace(root, include=["infospace.yaml"])
|
|
assert record.included_paths == ["infospace.yaml"]
|
|
assert record.file_count == 1
|
|
|
|
|
|
def test_archive_rejects_empty_include(tmp_path: Path) -> None:
|
|
root = _seed_infospace(tmp_path)
|
|
|
|
with pytest.raises(InfospaceError) as excinfo:
|
|
archive_infospace(root, include=["does-not-exist"])
|
|
assert excinfo.value.code == "empty_archive"
|
|
|
|
|
|
def test_restore_archive_round_trips_bytes(tmp_path: Path) -> None:
|
|
root = _seed_infospace(tmp_path)
|
|
record = archive_infospace(root, note="round trip")
|
|
|
|
target = tmp_path / "restored"
|
|
result = restore_archive(
|
|
record.package_id,
|
|
target=target,
|
|
source_infospace=root,
|
|
)
|
|
|
|
assert isinstance(result, RestoredArchive)
|
|
assert result.manifest_digest == record.manifest_digest
|
|
assert result.file_count == record.file_count
|
|
|
|
for rel in result.restored_paths:
|
|
original = root / rel
|
|
restored = target / rel
|
|
assert restored.is_file()
|
|
assert filecmp.cmp(original, restored, shallow=False), rel
|
|
|
|
|
|
def test_restore_archive_refuses_non_empty_target(tmp_path: Path) -> None:
|
|
root = _seed_infospace(tmp_path)
|
|
record = archive_infospace(root)
|
|
|
|
target = tmp_path / "filled"
|
|
target.mkdir()
|
|
(target / "existing.txt").write_text("hi", encoding="utf-8")
|
|
|
|
with pytest.raises(InfospaceError) as excinfo:
|
|
restore_archive(
|
|
record.package_id,
|
|
target=target,
|
|
source_infospace=root,
|
|
)
|
|
assert excinfo.value.code == "restore_target_not_empty"
|
|
|
|
|
|
def test_restore_archive_force_overwrites_non_empty_target(tmp_path: Path) -> None:
|
|
root = _seed_infospace(tmp_path)
|
|
record = archive_infospace(root)
|
|
|
|
target = tmp_path / "filled-force"
|
|
target.mkdir()
|
|
(target / "leftover.txt").write_text("old", encoding="utf-8")
|
|
|
|
result = restore_archive(
|
|
record.package_id,
|
|
target=target,
|
|
source_infospace=root,
|
|
force=True,
|
|
)
|
|
assert result.file_count == record.file_count
|
|
# Pre-existing files that are not in the manifest are left in place.
|
|
assert (target / "leftover.txt").read_text(encoding="utf-8") == "old"
|
|
|
|
|
|
def test_restore_archive_requires_store_location(tmp_path: Path) -> None:
|
|
with pytest.raises(InfospaceError) as excinfo:
|
|
restore_archive("00000000-0000-0000-0000-000000000000", target=tmp_path)
|
|
assert excinfo.value.code == "missing_archive_store"
|
|
|
|
|
|
def test_annotate_retention_returns_state_for_each_archive(tmp_path: Path) -> None:
|
|
root = _seed_infospace(tmp_path)
|
|
first = archive_infospace(root)
|
|
second = archive_infospace(root)
|
|
|
|
archives = list_archives(root)
|
|
annotated = annotate_retention(archives, source_infospace=root)
|
|
|
|
assert [item["archive"]["package_id"] for item in annotated] == [
|
|
first.package_id,
|
|
second.package_id,
|
|
]
|
|
for item in annotated:
|
|
retention = item["retention"]
|
|
assert retention is not None
|
|
assert retention["effective_class"] == DEFAULT_RETENTION_CLASS
|
|
assert retention["eligible_for_deletion"] is False
|
|
|
|
|
|
def test_archive_default_include_captures_contracts_and_schemas(
|
|
tmp_path: Path,
|
|
) -> None:
|
|
root = _seed_infospace(tmp_path)
|
|
(root / "contracts").mkdir()
|
|
(root / "contracts" / "entity.contract.md").write_text(
|
|
"# contract\n", encoding="utf-8"
|
|
)
|
|
(root / "schemas").mkdir()
|
|
(root / "schemas" / "entity.schema.json").write_text("{}", encoding="utf-8")
|
|
|
|
record = archive_infospace(root)
|
|
assert "contracts/entity.contract.md" in [
|
|
rel for rel in _restored_paths_via_round_trip(record, root, tmp_path)
|
|
]
|
|
|
|
|
|
def test_archive_surfaces_skipped_top_level_dirs(tmp_path: Path) -> None:
|
|
root = _seed_infospace(tmp_path)
|
|
(root / "experimental").mkdir()
|
|
(root / "experimental" / "scratch.md").write_text("scratch", encoding="utf-8")
|
|
(root / "empty-dir").mkdir() # empty: not flagged
|
|
|
|
record = archive_infospace(root)
|
|
assert record.skipped_top_level == ["experimental"]
|
|
|
|
|
|
def test_annotate_retention_returns_none_when_store_missing(tmp_path: Path) -> None:
|
|
root = _seed_infospace(tmp_path)
|
|
archive_infospace(root, store_root=tmp_path / "external-store")
|
|
|
|
archives = list_archives(root)
|
|
# Source infospace's store doesn't exist (we overrode store_root)
|
|
annotated = annotate_retention(archives, source_infospace=root)
|
|
assert annotated[0]["retention"] is None
|