generated from coulomb/repo-seed
archive: include contracts/, schemas/; report skipped top-level dirs
Two of yesterday's archives silently dropped infospace content: the default include set was missing contracts/, so wealth-vsm-generation-pilot (16 files) and wealth-vsm-legacy-slice (12 files) were preserved as 14 and 10 files respectively. Fix the include set and make silent drops visible. - DEFAULT_INCLUDE now: infospace.yaml, artifacts, contracts, schemas, workflows, output, reports, exports - ArchiveRecord gains skipped_top_level: top-level entries present in the live root that are not in the include set, not excluded, and not auto- hidden (hidden dotfiles, empty dirs, .store/index.yaml). Surfaces in index.yaml only when non-empty. - Re-archived the two affected pilots with correct counts. Prior records remain in each index.yaml as history. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -15,3 +15,22 @@ archives:
|
||||
producer: infospace-bench
|
||||
subject: wealth-vsm-generation-pilot
|
||||
store_root: infospaces/wealth-vsm-generation-pilot/output/archives/.store
|
||||
- package_id: a4c7809f-51f3-4fd7-8f15-1be95e8b6171
|
||||
manifest_digest: blake3:f5932ba65730a41c0a844f86749854200bf6443f2246afd88a4a0dfbefabde6a
|
||||
retention_class: release-evidence
|
||||
created_at: '2026-05-17T10:20:51'
|
||||
included_paths:
|
||||
- infospace.yaml
|
||||
- artifacts
|
||||
- contracts
|
||||
- schemas
|
||||
- workflows
|
||||
- output
|
||||
- reports
|
||||
- exports
|
||||
file_count: 16
|
||||
note: 'Re-archive: capture contracts/ that initial archive missed (DEFAULT_INCLUDE
|
||||
fix)'
|
||||
producer: infospace-bench
|
||||
subject: wealth-vsm-generation-pilot
|
||||
store_root: infospaces/wealth-vsm-generation-pilot/output/archives/.store
|
||||
|
||||
@@ -15,3 +15,22 @@ archives:
|
||||
producer: infospace-bench
|
||||
subject: wealth-vsm-legacy-slice
|
||||
store_root: infospaces/wealth-vsm-legacy-slice/output/archives/.store
|
||||
- package_id: ba107ffc-03b8-4c39-a72f-9aec66cf1b45
|
||||
manifest_digest: blake3:30b06d0b6fe7d9fed1a094805c07ce7896fff950aece8ec33e4df99da162accb
|
||||
retention_class: release-evidence
|
||||
created_at: '2026-05-17T10:20:54'
|
||||
included_paths:
|
||||
- infospace.yaml
|
||||
- artifacts
|
||||
- contracts
|
||||
- schemas
|
||||
- workflows
|
||||
- output
|
||||
- reports
|
||||
- exports
|
||||
file_count: 12
|
||||
note: 'Re-archive: capture contracts/ that initial archive missed (DEFAULT_INCLUDE
|
||||
fix)'
|
||||
producer: infospace-bench
|
||||
subject: wealth-vsm-legacy-slice
|
||||
store_root: infospaces/wealth-vsm-legacy-slice/output/archives/.store
|
||||
|
||||
@@ -46,11 +46,29 @@ ARCHIVE_BACKEND_DIR = "storage"
|
||||
DEFAULT_INCLUDE: tuple[str, ...] = (
|
||||
"infospace.yaml",
|
||||
"artifacts",
|
||||
"contracts",
|
||||
"schemas",
|
||||
"workflows",
|
||||
"output",
|
||||
"reports",
|
||||
"exports",
|
||||
)
|
||||
# Top-level entries the default include set already considers (file or dir),
|
||||
# plus things we never want to capture. Anything in the live root that is not
|
||||
# in this set and not in `exclude` shows up under `skipped_top_level` so silent
|
||||
# data loss is visible in the archive record.
|
||||
_KNOWN_TOP_LEVEL_NAMES: frozenset[str] = frozenset(
|
||||
{
|
||||
"infospace.yaml",
|
||||
"artifacts",
|
||||
"contracts",
|
||||
"schemas",
|
||||
"workflows",
|
||||
"output",
|
||||
"reports",
|
||||
"exports",
|
||||
}
|
||||
)
|
||||
DEFAULT_RETENTION_CLASS = "release-evidence"
|
||||
PRODUCER = "infospace-bench"
|
||||
DEFAULT_ACTOR = "infospace-bench"
|
||||
@@ -71,6 +89,7 @@ class ArchiveRecord:
|
||||
subject: str = ""
|
||||
store_root: str | None = None
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
skipped_top_level: list[str] = field(default_factory=list)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
out: dict[str, Any] = {
|
||||
@@ -88,6 +107,8 @@ class ArchiveRecord:
|
||||
out["store_root"] = self.store_root
|
||||
if self.metadata:
|
||||
out["metadata"] = dict(self.metadata)
|
||||
if self.skipped_top_level:
|
||||
out["skipped_top_level"] = list(self.skipped_top_level)
|
||||
return out
|
||||
|
||||
@classmethod
|
||||
@@ -106,6 +127,7 @@ class ArchiveRecord:
|
||||
str(data["store_root"]) if data.get("store_root") is not None else None
|
||||
),
|
||||
metadata=dict(data.get("metadata", {})),
|
||||
skipped_top_level=list(data.get("skipped_top_level", [])),
|
||||
)
|
||||
|
||||
|
||||
@@ -260,6 +282,9 @@ async def _archive_infospace_async(
|
||||
"No files matched the include set for archiving",
|
||||
{"root": str(root), "include": list(include)},
|
||||
)
|
||||
skipped_top_level = _find_skipped_top_level(
|
||||
root, include=include, exclude=effective_exclude
|
||||
)
|
||||
|
||||
owned_registry = registry is None
|
||||
effective_store_root: Path | None = None
|
||||
@@ -312,6 +337,7 @@ async def _archive_infospace_async(
|
||||
producer=PRODUCER,
|
||||
subject=subject,
|
||||
store_root=str(effective_store_root) if effective_store_root else None,
|
||||
skipped_top_level=skipped_top_level,
|
||||
)
|
||||
_append_index(root, record)
|
||||
return record
|
||||
@@ -341,6 +367,40 @@ def _collect_files(
|
||||
return sorted(seen.items())
|
||||
|
||||
|
||||
def _find_skipped_top_level(
|
||||
root: Path,
|
||||
*,
|
||||
include: tuple[str, ...],
|
||||
exclude: tuple[str, ...],
|
||||
) -> list[str]:
|
||||
"""Return non-empty top-level entries that are silently dropped.
|
||||
|
||||
A top-level entry is "skipped" when it is neither in the include set, nor
|
||||
in the known structural set, nor matched by an exclude pattern. The auto-
|
||||
excluded ``output/archives`` index/store paths do not count as user-
|
||||
visible drops.
|
||||
"""
|
||||
|
||||
auto_excluded = {".store", "index.yaml"}
|
||||
skipped: list[str] = []
|
||||
for entry in sorted(root.iterdir()):
|
||||
name = entry.name
|
||||
if name in _KNOWN_TOP_LEVEL_NAMES or name in include:
|
||||
continue
|
||||
if _is_excluded(name, exclude) or name in auto_excluded:
|
||||
continue
|
||||
# Hide hidden files (.git, .DS_Store, ...) by default.
|
||||
if name.startswith("."):
|
||||
continue
|
||||
if entry.is_dir():
|
||||
try:
|
||||
next(entry.iterdir())
|
||||
except (StopIteration, PermissionError):
|
||||
continue
|
||||
skipped.append(name)
|
||||
return skipped
|
||||
|
||||
|
||||
def _is_excluded(rel_path: str, exclude: tuple[str, ...]) -> bool:
|
||||
for pattern in exclude:
|
||||
cleaned = pattern.rstrip("/")
|
||||
|
||||
@@ -25,6 +25,16 @@ from infospace_bench.archive import (
|
||||
)
|
||||
|
||||
|
||||
def _restored_paths_via_round_trip(
|
||||
record: ArchiveRecord, source: Path, tmp_path: Path
|
||||
) -> list[str]:
|
||||
target = tmp_path / f"restore-{record.package_id[:8]}"
|
||||
result = restore_archive(
|
||||
record.package_id, target=target, source_infospace=source,
|
||||
)
|
||||
return list(result.restored_paths)
|
||||
|
||||
|
||||
def _seed_infospace(workspace: Path, slug: str = "demo") -> Path:
|
||||
create_infospace(workspace, slug, name="Demo", topic_domain="Test")
|
||||
root = workspace / "infospaces" / slug
|
||||
@@ -188,6 +198,33 @@ def test_annotate_retention_returns_state_for_each_archive(tmp_path: Path) -> No
|
||||
assert retention["eligible_for_deletion"] is False
|
||||
|
||||
|
||||
def test_archive_default_include_captures_contracts_and_schemas(
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
root = _seed_infospace(tmp_path)
|
||||
(root / "contracts").mkdir()
|
||||
(root / "contracts" / "entity.contract.md").write_text(
|
||||
"# contract\n", encoding="utf-8"
|
||||
)
|
||||
(root / "schemas").mkdir()
|
||||
(root / "schemas" / "entity.schema.json").write_text("{}", encoding="utf-8")
|
||||
|
||||
record = archive_infospace(root)
|
||||
assert "contracts/entity.contract.md" in [
|
||||
rel for rel in _restored_paths_via_round_trip(record, root, tmp_path)
|
||||
]
|
||||
|
||||
|
||||
def test_archive_surfaces_skipped_top_level_dirs(tmp_path: Path) -> None:
|
||||
root = _seed_infospace(tmp_path)
|
||||
(root / "experimental").mkdir()
|
||||
(root / "experimental" / "scratch.md").write_text("scratch", encoding="utf-8")
|
||||
(root / "empty-dir").mkdir() # empty: not flagged
|
||||
|
||||
record = archive_infospace(root)
|
||||
assert record.skipped_top_level == ["experimental"]
|
||||
|
||||
|
||||
def test_annotate_retention_returns_none_when_store_missing(tmp_path: Path) -> None:
|
||||
root = _seed_infospace(tmp_path)
|
||||
archive_infospace(root, store_root=tmp_path / "external-store")
|
||||
|
||||
Reference in New Issue
Block a user