generated from coulomb/repo-seed
Close repo reality scanner rollout
This commit is contained in:
@@ -258,6 +258,75 @@ After review, rerun with `--ingest` to store the snapshots. Add `--accept` only
|
||||
when candidates marked `review_state: accepted` should be projected into the
|
||||
registry graph.
|
||||
|
||||
## Scan Profiles And Review Workflow
|
||||
|
||||
The initial profile is `deterministic`, which means repo-local extraction plus
|
||||
any explicitly enabled offline connectors. Additional profiles should be named
|
||||
for the evidence policy they represent, for example `deterministic-llm-draft`
|
||||
or `catalog-followup`. Keep profile names stable because per-repo previous
|
||||
snapshots use `<slug>-<profile>.discovery.json`.
|
||||
|
||||
Recommended workflow:
|
||||
|
||||
1. Run `scan` or `registry scan-manifest` with `--dry-run`.
|
||||
2. Reconcile with `--previous-snapshot` or `--previous-dir` when a prior
|
||||
snapshot exists.
|
||||
3. Review candidates with `review_state: needs_review`, `status: conflicted`,
|
||||
tombstones, and review artifacts before accepting anything.
|
||||
4. Store reviewed output with `registry ingest-discovery`.
|
||||
5. Use `registry accept-discovery` or `registry scan-manifest --ingest --accept`
|
||||
only for candidates whose review state is acceptable for projection.
|
||||
|
||||
## Failure Modes
|
||||
|
||||
Failures are captured close to the evidence source:
|
||||
|
||||
- Missing repo paths, invalid manifest entries, unreadable previous snapshots,
|
||||
and registry request failures mark that repo as `status: error` in
|
||||
`scan-manifest` without stopping other repos.
|
||||
- Connector failures become review artifacts such as `connector_unavailable` or
|
||||
`connector_failed`.
|
||||
- LLM provider failures and malformed model output become `llm_execution_error`
|
||||
or `llm_output_invalid` review artifacts.
|
||||
- Low-confidence LLM candidates become `llm_low_confidence` artifacts instead
|
||||
of graph candidates.
|
||||
- Possible duplicates are marked as conflicts and left for review instead of
|
||||
being silently merged.
|
||||
|
||||
## Rollout Dry Run
|
||||
|
||||
The first small local rollout ran on 2026-05-19:
|
||||
|
||||
```bash
|
||||
railiance-fabric registry scan-manifest registry/local-repos.yaml \
|
||||
--repo-slug repo-scoping \
|
||||
--repo-slug llm-connect \
|
||||
--repo-slug railiance-fabric \
|
||||
--dry-run \
|
||||
--connector local-fabric-registry
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
- `repo-scoping`: 18 nodes, 17 edges, 13 attributes
|
||||
- `llm-connect`: 5 nodes, 4 edges, 13 attributes
|
||||
- `railiance-fabric`: 55 nodes, 63 edges, 13 attributes
|
||||
- summary: 3 scanned, 0 changed, 0 retired, 0 conflicted, 3 LLM skipped,
|
||||
0 LLM failed, 0 accepted, 0 errors
|
||||
|
||||
Follow-up backlog from this first pass:
|
||||
|
||||
- Add a standard discovery snapshot directory, likely `.fabric-discovery/`, so
|
||||
repeated dry-runs can reconcile by default.
|
||||
- Add a previous-from-registry option so manifest scans can diff against the
|
||||
latest stored discovery snapshot without exporting JSON first.
|
||||
- Expand runtime/deployment extraction beyond local manifests to cover live
|
||||
server and deployment inventory connectors.
|
||||
- Add review UI affordances for conflicts, tombstones, and bulk acceptance once
|
||||
enough repos have baseline snapshots.
|
||||
- Define privacy and budget defaults before enabling non-mock LLM providers in
|
||||
multi-repo scans.
|
||||
|
||||
## Identity
|
||||
|
||||
Identity is the main safety boundary. The scanner must not append guesses on
|
||||
|
||||
@@ -126,6 +126,63 @@ def test_scan_cli_reconciles_against_previous_snapshot(tmp_path: Path, capsys) -
|
||||
assert any(tombstone["stable_key"] == vanished_key for tombstone in payload["tombstones"])
|
||||
|
||||
|
||||
def test_three_rescans_keep_stable_identity_and_retire_vanished_evidence(tmp_path: Path) -> None:
|
||||
repo = tmp_path / "fixture-repo"
|
||||
repo.mkdir()
|
||||
(repo / "README.md").write_text("# Fixture Repo\n", encoding="utf-8")
|
||||
_write_pyproject(repo, ["PyYAML>=6.0"])
|
||||
|
||||
first = scan_repo(
|
||||
ScanOptions(
|
||||
repo_path=repo,
|
||||
repo_slug="fixture-repo",
|
||||
repo_name="Fixture Repo",
|
||||
commit="commit-1",
|
||||
)
|
||||
)
|
||||
_assert_unique_candidate_keys(first)
|
||||
|
||||
_write_pyproject(repo, ["PyYAML>=6.0", "requests>=2.31"])
|
||||
second = reconcile_discovery_snapshots(
|
||||
first,
|
||||
scan_repo(
|
||||
ScanOptions(
|
||||
repo_path=repo,
|
||||
repo_slug="fixture-repo",
|
||||
repo_name="Fixture Repo",
|
||||
commit="commit-2",
|
||||
)
|
||||
),
|
||||
)
|
||||
_validate_schema("discovery-snapshot.schema.yaml", second)
|
||||
_assert_unique_candidate_keys(second)
|
||||
|
||||
requests_key = discovery_stable_key("fixture-repo", "ExternalLibrary", "requests")
|
||||
pyyaml_key = discovery_stable_key("fixture-repo", "ExternalLibrary", "PyYAML")
|
||||
assert requests_key in second["reconciliation"]["diff"]["added"]
|
||||
assert requests_key in {node["stable_key"] for node in second["candidates"]["nodes"]}
|
||||
|
||||
_write_pyproject(repo, ["PyYAML>=6.0"])
|
||||
third = reconcile_discovery_snapshots(
|
||||
second,
|
||||
scan_repo(
|
||||
ScanOptions(
|
||||
repo_path=repo,
|
||||
repo_slug="fixture-repo",
|
||||
repo_name="Fixture Repo",
|
||||
commit="commit-3",
|
||||
)
|
||||
),
|
||||
)
|
||||
_validate_schema("discovery-snapshot.schema.yaml", third)
|
||||
_assert_unique_candidate_keys(third)
|
||||
|
||||
assert requests_key in third["reconciliation"]["diff"]["retired"]
|
||||
assert pyyaml_key not in third["reconciliation"]["diff"]["retired"]
|
||||
assert requests_key not in {node["stable_key"] for node in third["candidates"]["nodes"]}
|
||||
assert any(tombstone["stable_key"] == requests_key for tombstone in third["tombstones"])
|
||||
|
||||
|
||||
def _snapshot(
|
||||
*,
|
||||
replacement_scopes: list[dict[str, object]],
|
||||
@@ -206,6 +263,31 @@ def _anchor(source_kind: str, path: str) -> dict[str, object]:
|
||||
return anchor
|
||||
|
||||
|
||||
def _write_pyproject(repo: Path, dependencies: list[str]) -> None:
|
||||
dependency_lines = "\n".join(f' "{dependency}",' for dependency in dependencies)
|
||||
(repo / "pyproject.toml").write_text(
|
||||
f"""
|
||||
[project]
|
||||
name = "fixture-service"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
{dependency_lines}
|
||||
]
|
||||
""".lstrip(),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
|
||||
def _assert_unique_candidate_keys(snapshot: dict[str, object]) -> None:
|
||||
candidates = snapshot["candidates"]
|
||||
assert isinstance(candidates, dict)
|
||||
for collection_name in ("nodes", "edges", "attributes"):
|
||||
collection = candidates[collection_name]
|
||||
assert isinstance(collection, list)
|
||||
stable_keys = [item["stable_key"] for item in collection]
|
||||
assert len(stable_keys) == len(set(stable_keys))
|
||||
|
||||
|
||||
def _validate_schema(schema_name: str, payload: dict[str, object]) -> None:
|
||||
validator = draft202012_validator(Path("schemas") / schema_name)
|
||||
validator.validate(payload)
|
||||
|
||||
@@ -4,7 +4,7 @@ type: workplan
|
||||
title: "Repo Reality Scanner"
|
||||
domain: railiance
|
||||
repo: railiance-fabric
|
||||
status: active
|
||||
status: finished
|
||||
owner: codex
|
||||
topic_slug: railiance
|
||||
planning_priority: high
|
||||
@@ -295,7 +295,7 @@ Acceptance notes:
|
||||
|
||||
```task
|
||||
id: RAIL-FAB-WP-0010-T08
|
||||
status: todo
|
||||
status: done
|
||||
priority: medium
|
||||
state_hub_task_id: "7a5b7dd7-92c6-4ac5-ae4d-6e73f75aac0d"
|
||||
```
|
||||
|
||||
Reference in New Issue
Block a user