generated from coulomb/repo-seed
archive: include contracts/, schemas/; report skipped top-level dirs
Two of yesterday's archives silently dropped infospace content: the default include set was missing contracts/, so wealth-vsm-generation-pilot (16 files) and wealth-vsm-legacy-slice (12 files) were preserved as 14 and 10 files respectively. Fix the include set and make silent drops visible. - DEFAULT_INCLUDE now: infospace.yaml, artifacts, contracts, schemas, workflows, output, reports, exports - ArchiveRecord gains skipped_top_level: top-level entries present in the live root that are not in the include set, not excluded, and not auto- hidden (hidden dotfiles, empty dirs, .store/index.yaml). Surfaces in index.yaml only when non-empty. - Re-archived the two affected pilots with correct counts. Prior records remain in each index.yaml as history. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -15,3 +15,22 @@ archives:
|
|||||||
producer: infospace-bench
|
producer: infospace-bench
|
||||||
subject: wealth-vsm-generation-pilot
|
subject: wealth-vsm-generation-pilot
|
||||||
store_root: infospaces/wealth-vsm-generation-pilot/output/archives/.store
|
store_root: infospaces/wealth-vsm-generation-pilot/output/archives/.store
|
||||||
|
- package_id: a4c7809f-51f3-4fd7-8f15-1be95e8b6171
|
||||||
|
manifest_digest: blake3:f5932ba65730a41c0a844f86749854200bf6443f2246afd88a4a0dfbefabde6a
|
||||||
|
retention_class: release-evidence
|
||||||
|
created_at: '2026-05-17T10:20:51'
|
||||||
|
included_paths:
|
||||||
|
- infospace.yaml
|
||||||
|
- artifacts
|
||||||
|
- contracts
|
||||||
|
- schemas
|
||||||
|
- workflows
|
||||||
|
- output
|
||||||
|
- reports
|
||||||
|
- exports
|
||||||
|
file_count: 16
|
||||||
|
note: 'Re-archive: capture contracts/ that initial archive missed (DEFAULT_INCLUDE
|
||||||
|
fix)'
|
||||||
|
producer: infospace-bench
|
||||||
|
subject: wealth-vsm-generation-pilot
|
||||||
|
store_root: infospaces/wealth-vsm-generation-pilot/output/archives/.store
|
||||||
|
|||||||
@@ -15,3 +15,22 @@ archives:
|
|||||||
producer: infospace-bench
|
producer: infospace-bench
|
||||||
subject: wealth-vsm-legacy-slice
|
subject: wealth-vsm-legacy-slice
|
||||||
store_root: infospaces/wealth-vsm-legacy-slice/output/archives/.store
|
store_root: infospaces/wealth-vsm-legacy-slice/output/archives/.store
|
||||||
|
- package_id: ba107ffc-03b8-4c39-a72f-9aec66cf1b45
|
||||||
|
manifest_digest: blake3:30b06d0b6fe7d9fed1a094805c07ce7896fff950aece8ec33e4df99da162accb
|
||||||
|
retention_class: release-evidence
|
||||||
|
created_at: '2026-05-17T10:20:54'
|
||||||
|
included_paths:
|
||||||
|
- infospace.yaml
|
||||||
|
- artifacts
|
||||||
|
- contracts
|
||||||
|
- schemas
|
||||||
|
- workflows
|
||||||
|
- output
|
||||||
|
- reports
|
||||||
|
- exports
|
||||||
|
file_count: 12
|
||||||
|
note: 'Re-archive: capture contracts/ that initial archive missed (DEFAULT_INCLUDE
|
||||||
|
fix)'
|
||||||
|
producer: infospace-bench
|
||||||
|
subject: wealth-vsm-legacy-slice
|
||||||
|
store_root: infospaces/wealth-vsm-legacy-slice/output/archives/.store
|
||||||
|
|||||||
@@ -46,11 +46,29 @@ ARCHIVE_BACKEND_DIR = "storage"
|
|||||||
DEFAULT_INCLUDE: tuple[str, ...] = (
|
DEFAULT_INCLUDE: tuple[str, ...] = (
|
||||||
"infospace.yaml",
|
"infospace.yaml",
|
||||||
"artifacts",
|
"artifacts",
|
||||||
|
"contracts",
|
||||||
|
"schemas",
|
||||||
"workflows",
|
"workflows",
|
||||||
"output",
|
"output",
|
||||||
"reports",
|
"reports",
|
||||||
"exports",
|
"exports",
|
||||||
)
|
)
|
||||||
|
# Top-level entries the default include set already considers (file or dir),
|
||||||
|
# plus things we never want to capture. Anything in the live root that is not
|
||||||
|
# in this set and not in `exclude` shows up under `skipped_top_level` so silent
|
||||||
|
# data loss is visible in the archive record.
|
||||||
|
_KNOWN_TOP_LEVEL_NAMES: frozenset[str] = frozenset(
|
||||||
|
{
|
||||||
|
"infospace.yaml",
|
||||||
|
"artifacts",
|
||||||
|
"contracts",
|
||||||
|
"schemas",
|
||||||
|
"workflows",
|
||||||
|
"output",
|
||||||
|
"reports",
|
||||||
|
"exports",
|
||||||
|
}
|
||||||
|
)
|
||||||
DEFAULT_RETENTION_CLASS = "release-evidence"
|
DEFAULT_RETENTION_CLASS = "release-evidence"
|
||||||
PRODUCER = "infospace-bench"
|
PRODUCER = "infospace-bench"
|
||||||
DEFAULT_ACTOR = "infospace-bench"
|
DEFAULT_ACTOR = "infospace-bench"
|
||||||
@@ -71,6 +89,7 @@ class ArchiveRecord:
|
|||||||
subject: str = ""
|
subject: str = ""
|
||||||
store_root: str | None = None
|
store_root: str | None = None
|
||||||
metadata: dict[str, Any] = field(default_factory=dict)
|
metadata: dict[str, Any] = field(default_factory=dict)
|
||||||
|
skipped_top_level: list[str] = field(default_factory=list)
|
||||||
|
|
||||||
def to_dict(self) -> dict[str, Any]:
|
def to_dict(self) -> dict[str, Any]:
|
||||||
out: dict[str, Any] = {
|
out: dict[str, Any] = {
|
||||||
@@ -88,6 +107,8 @@ class ArchiveRecord:
|
|||||||
out["store_root"] = self.store_root
|
out["store_root"] = self.store_root
|
||||||
if self.metadata:
|
if self.metadata:
|
||||||
out["metadata"] = dict(self.metadata)
|
out["metadata"] = dict(self.metadata)
|
||||||
|
if self.skipped_top_level:
|
||||||
|
out["skipped_top_level"] = list(self.skipped_top_level)
|
||||||
return out
|
return out
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@@ -106,6 +127,7 @@ class ArchiveRecord:
|
|||||||
str(data["store_root"]) if data.get("store_root") is not None else None
|
str(data["store_root"]) if data.get("store_root") is not None else None
|
||||||
),
|
),
|
||||||
metadata=dict(data.get("metadata", {})),
|
metadata=dict(data.get("metadata", {})),
|
||||||
|
skipped_top_level=list(data.get("skipped_top_level", [])),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -260,6 +282,9 @@ async def _archive_infospace_async(
|
|||||||
"No files matched the include set for archiving",
|
"No files matched the include set for archiving",
|
||||||
{"root": str(root), "include": list(include)},
|
{"root": str(root), "include": list(include)},
|
||||||
)
|
)
|
||||||
|
skipped_top_level = _find_skipped_top_level(
|
||||||
|
root, include=include, exclude=effective_exclude
|
||||||
|
)
|
||||||
|
|
||||||
owned_registry = registry is None
|
owned_registry = registry is None
|
||||||
effective_store_root: Path | None = None
|
effective_store_root: Path | None = None
|
||||||
@@ -312,6 +337,7 @@ async def _archive_infospace_async(
|
|||||||
producer=PRODUCER,
|
producer=PRODUCER,
|
||||||
subject=subject,
|
subject=subject,
|
||||||
store_root=str(effective_store_root) if effective_store_root else None,
|
store_root=str(effective_store_root) if effective_store_root else None,
|
||||||
|
skipped_top_level=skipped_top_level,
|
||||||
)
|
)
|
||||||
_append_index(root, record)
|
_append_index(root, record)
|
||||||
return record
|
return record
|
||||||
@@ -341,6 +367,40 @@ def _collect_files(
|
|||||||
return sorted(seen.items())
|
return sorted(seen.items())
|
||||||
|
|
||||||
|
|
||||||
|
def _find_skipped_top_level(
|
||||||
|
root: Path,
|
||||||
|
*,
|
||||||
|
include: tuple[str, ...],
|
||||||
|
exclude: tuple[str, ...],
|
||||||
|
) -> list[str]:
|
||||||
|
"""Return non-empty top-level entries that are silently dropped.
|
||||||
|
|
||||||
|
A top-level entry is "skipped" when it is neither in the include set, nor
|
||||||
|
in the known structural set, nor matched by an exclude pattern. The auto-
|
||||||
|
excluded ``output/archives`` index/store paths do not count as user-
|
||||||
|
visible drops.
|
||||||
|
"""
|
||||||
|
|
||||||
|
auto_excluded = {".store", "index.yaml"}
|
||||||
|
skipped: list[str] = []
|
||||||
|
for entry in sorted(root.iterdir()):
|
||||||
|
name = entry.name
|
||||||
|
if name in _KNOWN_TOP_LEVEL_NAMES or name in include:
|
||||||
|
continue
|
||||||
|
if _is_excluded(name, exclude) or name in auto_excluded:
|
||||||
|
continue
|
||||||
|
# Hide hidden files (.git, .DS_Store, ...) by default.
|
||||||
|
if name.startswith("."):
|
||||||
|
continue
|
||||||
|
if entry.is_dir():
|
||||||
|
try:
|
||||||
|
next(entry.iterdir())
|
||||||
|
except (StopIteration, PermissionError):
|
||||||
|
continue
|
||||||
|
skipped.append(name)
|
||||||
|
return skipped
|
||||||
|
|
||||||
|
|
||||||
def _is_excluded(rel_path: str, exclude: tuple[str, ...]) -> bool:
|
def _is_excluded(rel_path: str, exclude: tuple[str, ...]) -> bool:
|
||||||
for pattern in exclude:
|
for pattern in exclude:
|
||||||
cleaned = pattern.rstrip("/")
|
cleaned = pattern.rstrip("/")
|
||||||
|
|||||||
@@ -25,6 +25,16 @@ from infospace_bench.archive import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _restored_paths_via_round_trip(
|
||||||
|
record: ArchiveRecord, source: Path, tmp_path: Path
|
||||||
|
) -> list[str]:
|
||||||
|
target = tmp_path / f"restore-{record.package_id[:8]}"
|
||||||
|
result = restore_archive(
|
||||||
|
record.package_id, target=target, source_infospace=source,
|
||||||
|
)
|
||||||
|
return list(result.restored_paths)
|
||||||
|
|
||||||
|
|
||||||
def _seed_infospace(workspace: Path, slug: str = "demo") -> Path:
|
def _seed_infospace(workspace: Path, slug: str = "demo") -> Path:
|
||||||
create_infospace(workspace, slug, name="Demo", topic_domain="Test")
|
create_infospace(workspace, slug, name="Demo", topic_domain="Test")
|
||||||
root = workspace / "infospaces" / slug
|
root = workspace / "infospaces" / slug
|
||||||
@@ -188,6 +198,33 @@ def test_annotate_retention_returns_state_for_each_archive(tmp_path: Path) -> No
|
|||||||
assert retention["eligible_for_deletion"] is False
|
assert retention["eligible_for_deletion"] is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_archive_default_include_captures_contracts_and_schemas(
|
||||||
|
tmp_path: Path,
|
||||||
|
) -> None:
|
||||||
|
root = _seed_infospace(tmp_path)
|
||||||
|
(root / "contracts").mkdir()
|
||||||
|
(root / "contracts" / "entity.contract.md").write_text(
|
||||||
|
"# contract\n", encoding="utf-8"
|
||||||
|
)
|
||||||
|
(root / "schemas").mkdir()
|
||||||
|
(root / "schemas" / "entity.schema.json").write_text("{}", encoding="utf-8")
|
||||||
|
|
||||||
|
record = archive_infospace(root)
|
||||||
|
assert "contracts/entity.contract.md" in [
|
||||||
|
rel for rel in _restored_paths_via_round_trip(record, root, tmp_path)
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_archive_surfaces_skipped_top_level_dirs(tmp_path: Path) -> None:
|
||||||
|
root = _seed_infospace(tmp_path)
|
||||||
|
(root / "experimental").mkdir()
|
||||||
|
(root / "experimental" / "scratch.md").write_text("scratch", encoding="utf-8")
|
||||||
|
(root / "empty-dir").mkdir() # empty: not flagged
|
||||||
|
|
||||||
|
record = archive_infospace(root)
|
||||||
|
assert record.skipped_top_level == ["experimental"]
|
||||||
|
|
||||||
|
|
||||||
def test_annotate_retention_returns_none_when_store_missing(tmp_path: Path) -> None:
|
def test_annotate_retention_returns_none_when_store_missing(tmp_path: Path) -> None:
|
||||||
root = _seed_infospace(tmp_path)
|
root = _seed_infospace(tmp_path)
|
||||||
archive_infospace(root, store_root=tmp_path / "external-store")
|
archive_infospace(root, store_root=tmp_path / "external-store")
|
||||||
|
|||||||
Reference in New Issue
Block a user