from __future__ import annotations import filecmp from pathlib import Path import pytest import yaml from infospace_bench import ( ArchiveRecord, InfospaceError, RestoredArchive, add_artifact, annotate_retention, archive_infospace, create_infospace, list_archives, restore_archive, ) from infospace_bench.archive import ( ARCHIVE_INDEX_PATH, ARCHIVE_STORE_DIR, DEFAULT_RETENTION_CLASS, PRODUCER, ) def _restored_paths_via_round_trip( record: ArchiveRecord, source: Path, tmp_path: Path ) -> list[str]: target = tmp_path / f"restore-{record.package_id[:8]}" result = restore_archive( record.package_id, target=target, source_infospace=source, ) return list(result.restored_paths) def _seed_infospace(workspace: Path, slug: str = "demo") -> Path: create_infospace(workspace, slug, name="Demo", topic_domain="Test") root = workspace / "infospaces" / slug source = workspace / "source.md" source.write_text("# source\n", encoding="utf-8") add_artifact(root, source, kind="source", title="Source One") (root / "reports" / "summary.md").write_text("# summary\n", encoding="utf-8") return root def test_archive_infospace_writes_index_and_finalizes_package(tmp_path: Path) -> None: root = _seed_infospace(tmp_path) record = archive_infospace(root, note="first archive") assert isinstance(record, ArchiveRecord) assert record.package_id assert record.manifest_digest.startswith("blake3:") assert record.retention_class == DEFAULT_RETENTION_CLASS assert record.producer == PRODUCER assert record.subject == "demo" assert record.note == "first archive" assert record.file_count >= 4 # infospace.yaml, index.yaml, source.md, summary.md index_path = root / ARCHIVE_INDEX_PATH assert index_path.is_file() data = yaml.safe_load(index_path.read_text(encoding="utf-8")) assert isinstance(data, dict) assert len(data["archives"]) == 1 assert data["archives"][0]["package_id"] == record.package_id store_root = root / ARCHIVE_STORE_DIR assert (store_root / "registry.sqlite").is_file() assert (store_root / "storage").is_dir() def test_list_archives_returns_recorded_entries(tmp_path: Path) -> None: root = _seed_infospace(tmp_path) assert list_archives(root) == [] first = archive_infospace(root, note="alpha") second = archive_infospace(root, note="beta") archives = list_archives(root) assert [a.package_id for a in archives] == [first.package_id, second.package_id] assert [a.note for a in archives] == ["alpha", "beta"] def test_archive_excludes_store_dir_to_avoid_recursive_capture(tmp_path: Path) -> None: root = _seed_infospace(tmp_path) first = archive_infospace(root) second = archive_infospace(root) # The store dir grows on the first call; the second call must not pick up # any of its bytes (otherwise file_count would balloon). assert second.file_count == first.file_count second_record = list_archives(root)[1] assert all( not path.startswith(ARCHIVE_STORE_DIR) for path in second_record.included_paths ) def test_archive_respects_caller_supplied_include_set(tmp_path: Path) -> None: root = _seed_infospace(tmp_path) record = archive_infospace(root, include=["infospace.yaml"]) assert record.included_paths == ["infospace.yaml"] assert record.file_count == 1 def test_archive_rejects_empty_include(tmp_path: Path) -> None: root = _seed_infospace(tmp_path) with pytest.raises(InfospaceError) as excinfo: archive_infospace(root, include=["does-not-exist"]) assert excinfo.value.code == "empty_archive" def test_restore_archive_round_trips_bytes(tmp_path: Path) -> None: root = _seed_infospace(tmp_path) record = archive_infospace(root, note="round trip") target = tmp_path / "restored" result = restore_archive( record.package_id, target=target, source_infospace=root, ) assert isinstance(result, RestoredArchive) assert result.manifest_digest == record.manifest_digest assert result.file_count == record.file_count for rel in result.restored_paths: original = root / rel restored = target / rel assert restored.is_file() assert filecmp.cmp(original, restored, shallow=False), rel def test_restore_archive_refuses_non_empty_target(tmp_path: Path) -> None: root = _seed_infospace(tmp_path) record = archive_infospace(root) target = tmp_path / "filled" target.mkdir() (target / "existing.txt").write_text("hi", encoding="utf-8") with pytest.raises(InfospaceError) as excinfo: restore_archive( record.package_id, target=target, source_infospace=root, ) assert excinfo.value.code == "restore_target_not_empty" def test_restore_archive_force_overwrites_non_empty_target(tmp_path: Path) -> None: root = _seed_infospace(tmp_path) record = archive_infospace(root) target = tmp_path / "filled-force" target.mkdir() (target / "leftover.txt").write_text("old", encoding="utf-8") result = restore_archive( record.package_id, target=target, source_infospace=root, force=True, ) assert result.file_count == record.file_count # Pre-existing files that are not in the manifest are left in place. assert (target / "leftover.txt").read_text(encoding="utf-8") == "old" def test_restore_archive_requires_store_location(tmp_path: Path) -> None: with pytest.raises(InfospaceError) as excinfo: restore_archive("00000000-0000-0000-0000-000000000000", target=tmp_path) assert excinfo.value.code == "missing_archive_store" def test_annotate_retention_returns_state_for_each_archive(tmp_path: Path) -> None: root = _seed_infospace(tmp_path) first = archive_infospace(root) second = archive_infospace(root) archives = list_archives(root) annotated = annotate_retention(archives, source_infospace=root) assert [item["archive"]["package_id"] for item in annotated] == [ first.package_id, second.package_id, ] for item in annotated: retention = item["retention"] assert retention is not None assert retention["effective_class"] == DEFAULT_RETENTION_CLASS assert retention["eligible_for_deletion"] is False def test_archive_default_include_captures_contracts_and_schemas( tmp_path: Path, ) -> None: root = _seed_infospace(tmp_path) (root / "contracts").mkdir() (root / "contracts" / "entity.contract.md").write_text( "# contract\n", encoding="utf-8" ) (root / "schemas").mkdir() (root / "schemas" / "entity.schema.json").write_text("{}", encoding="utf-8") record = archive_infospace(root) assert "contracts/entity.contract.md" in [ rel for rel in _restored_paths_via_round_trip(record, root, tmp_path) ] def test_archive_surfaces_skipped_top_level_dirs(tmp_path: Path) -> None: root = _seed_infospace(tmp_path) (root / "experimental").mkdir() (root / "experimental" / "scratch.md").write_text("scratch", encoding="utf-8") (root / "empty-dir").mkdir() # empty: not flagged record = archive_infospace(root) assert record.skipped_top_level == ["experimental"] def test_annotate_retention_returns_none_when_store_missing(tmp_path: Path) -> None: root = _seed_infospace(tmp_path) archive_infospace(root, store_root=tmp_path / "external-store") archives = list_archives(root) # Source infospace's store doesn't exist (we overrode store_root) annotated = annotate_retention(archives, source_infospace=root) assert annotated[0]["retention"] is None