archive: include contracts/, schemas/; report skipped top-level dirs

Two of yesterday's archives silently dropped infospace content: the default
include set was missing contracts/, so wealth-vsm-generation-pilot (16 files)
and wealth-vsm-legacy-slice (12 files) were preserved as 14 and 10 files
respectively. Fix the include set and make silent drops visible.

- DEFAULT_INCLUDE now: infospace.yaml, artifacts, contracts, schemas,
  workflows, output, reports, exports
- ArchiveRecord gains skipped_top_level: top-level entries present in the
  live root that are not in the include set, not excluded, and not auto-
  hidden (hidden dotfiles, empty dirs, .store/index.yaml). Surfaces in
  index.yaml only when non-empty.
- Re-archived the two affected pilots with correct counts. Prior records
  remain in each index.yaml as history.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-17 12:21:19 +02:00
parent 523db6d341
commit 37c28d2298
4 changed files with 135 additions and 0 deletions

View File

@@ -25,6 +25,16 @@ from infospace_bench.archive import (
)
def _restored_paths_via_round_trip(
record: ArchiveRecord, source: Path, tmp_path: Path
) -> list[str]:
target = tmp_path / f"restore-{record.package_id[:8]}"
result = restore_archive(
record.package_id, target=target, source_infospace=source,
)
return list(result.restored_paths)
def _seed_infospace(workspace: Path, slug: str = "demo") -> Path:
create_infospace(workspace, slug, name="Demo", topic_domain="Test")
root = workspace / "infospaces" / slug
@@ -188,6 +198,33 @@ def test_annotate_retention_returns_state_for_each_archive(tmp_path: Path) -> No
assert retention["eligible_for_deletion"] is False
def test_archive_default_include_captures_contracts_and_schemas(
tmp_path: Path,
) -> None:
root = _seed_infospace(tmp_path)
(root / "contracts").mkdir()
(root / "contracts" / "entity.contract.md").write_text(
"# contract\n", encoding="utf-8"
)
(root / "schemas").mkdir()
(root / "schemas" / "entity.schema.json").write_text("{}", encoding="utf-8")
record = archive_infospace(root)
assert "contracts/entity.contract.md" in [
rel for rel in _restored_paths_via_round_trip(record, root, tmp_path)
]
def test_archive_surfaces_skipped_top_level_dirs(tmp_path: Path) -> None:
root = _seed_infospace(tmp_path)
(root / "experimental").mkdir()
(root / "experimental" / "scratch.md").write_text("scratch", encoding="utf-8")
(root / "empty-dir").mkdir() # empty: not flagged
record = archive_infospace(root)
assert record.skipped_top_level == ["experimental"]
def test_annotate_retention_returns_none_when_store_missing(tmp_path: Path) -> None:
root = _seed_infospace(tmp_path)
archive_infospace(root, store_root=tmp_path / "external-store")