From babbe88a460305f166ec756890f15f85286e95b3 Mon Sep 17 00:00:00 2001 From: tegwick Date: Tue, 19 May 2026 18:36:48 +0200 Subject: [PATCH] Close repo reality scanner rollout --- docs/repo-reality-scanner.md | 69 ++++++++++++++++ tests/test_reconciliation.py | 82 +++++++++++++++++++ .../RAIL-FAB-WP-0010-repo-reality-scanner.md | 4 +- 3 files changed, 153 insertions(+), 2 deletions(-) diff --git a/docs/repo-reality-scanner.md b/docs/repo-reality-scanner.md index 28eef60..dd951b4 100644 --- a/docs/repo-reality-scanner.md +++ b/docs/repo-reality-scanner.md @@ -258,6 +258,75 @@ After review, rerun with `--ingest` to store the snapshots. Add `--accept` only when candidates marked `review_state: accepted` should be projected into the registry graph. +## Scan Profiles And Review Workflow + +The initial profile is `deterministic`, which means repo-local extraction plus +any explicitly enabled offline connectors. Additional profiles should be named +for the evidence policy they represent, for example `deterministic-llm-draft` +or `catalog-followup`. Keep profile names stable because per-repo previous +snapshots use `-.discovery.json`. + +Recommended workflow: + +1. Run `scan` or `registry scan-manifest` with `--dry-run`. +2. Reconcile with `--previous-snapshot` or `--previous-dir` when a prior + snapshot exists. +3. Review candidates with `review_state: needs_review`, `status: conflicted`, + tombstones, and review artifacts before accepting anything. +4. Store reviewed output with `registry ingest-discovery`. +5. Use `registry accept-discovery` or `registry scan-manifest --ingest --accept` + only for candidates whose review state is acceptable for projection. + +## Failure Modes + +Failures are captured close to the evidence source: + +- Missing repo paths, invalid manifest entries, unreadable previous snapshots, + and registry request failures mark that repo as `status: error` in + `scan-manifest` without stopping other repos. +- Connector failures become review artifacts such as `connector_unavailable` or + `connector_failed`. +- LLM provider failures and malformed model output become `llm_execution_error` + or `llm_output_invalid` review artifacts. +- Low-confidence LLM candidates become `llm_low_confidence` artifacts instead + of graph candidates. +- Possible duplicates are marked as conflicts and left for review instead of + being silently merged. + +## Rollout Dry Run + +The first small local rollout ran on 2026-05-19: + +```bash +railiance-fabric registry scan-manifest registry/local-repos.yaml \ + --repo-slug repo-scoping \ + --repo-slug llm-connect \ + --repo-slug railiance-fabric \ + --dry-run \ + --connector local-fabric-registry +``` + +Result: + +- `repo-scoping`: 18 nodes, 17 edges, 13 attributes +- `llm-connect`: 5 nodes, 4 edges, 13 attributes +- `railiance-fabric`: 55 nodes, 63 edges, 13 attributes +- summary: 3 scanned, 0 changed, 0 retired, 0 conflicted, 3 LLM skipped, + 0 LLM failed, 0 accepted, 0 errors + +Follow-up backlog from this first pass: + +- Add a standard discovery snapshot directory, likely `.fabric-discovery/`, so + repeated dry-runs can reconcile by default. +- Add a previous-from-registry option so manifest scans can diff against the + latest stored discovery snapshot without exporting JSON first. +- Expand runtime/deployment extraction beyond local manifests to cover live + server and deployment inventory connectors. +- Add review UI affordances for conflicts, tombstones, and bulk acceptance once + enough repos have baseline snapshots. +- Define privacy and budget defaults before enabling non-mock LLM providers in + multi-repo scans. + ## Identity Identity is the main safety boundary. The scanner must not append guesses on diff --git a/tests/test_reconciliation.py b/tests/test_reconciliation.py index 235de86..3e57b76 100644 --- a/tests/test_reconciliation.py +++ b/tests/test_reconciliation.py @@ -126,6 +126,63 @@ def test_scan_cli_reconciles_against_previous_snapshot(tmp_path: Path, capsys) - assert any(tombstone["stable_key"] == vanished_key for tombstone in payload["tombstones"]) +def test_three_rescans_keep_stable_identity_and_retire_vanished_evidence(tmp_path: Path) -> None: + repo = tmp_path / "fixture-repo" + repo.mkdir() + (repo / "README.md").write_text("# Fixture Repo\n", encoding="utf-8") + _write_pyproject(repo, ["PyYAML>=6.0"]) + + first = scan_repo( + ScanOptions( + repo_path=repo, + repo_slug="fixture-repo", + repo_name="Fixture Repo", + commit="commit-1", + ) + ) + _assert_unique_candidate_keys(first) + + _write_pyproject(repo, ["PyYAML>=6.0", "requests>=2.31"]) + second = reconcile_discovery_snapshots( + first, + scan_repo( + ScanOptions( + repo_path=repo, + repo_slug="fixture-repo", + repo_name="Fixture Repo", + commit="commit-2", + ) + ), + ) + _validate_schema("discovery-snapshot.schema.yaml", second) + _assert_unique_candidate_keys(second) + + requests_key = discovery_stable_key("fixture-repo", "ExternalLibrary", "requests") + pyyaml_key = discovery_stable_key("fixture-repo", "ExternalLibrary", "PyYAML") + assert requests_key in second["reconciliation"]["diff"]["added"] + assert requests_key in {node["stable_key"] for node in second["candidates"]["nodes"]} + + _write_pyproject(repo, ["PyYAML>=6.0"]) + third = reconcile_discovery_snapshots( + second, + scan_repo( + ScanOptions( + repo_path=repo, + repo_slug="fixture-repo", + repo_name="Fixture Repo", + commit="commit-3", + ) + ), + ) + _validate_schema("discovery-snapshot.schema.yaml", third) + _assert_unique_candidate_keys(third) + + assert requests_key in third["reconciliation"]["diff"]["retired"] + assert pyyaml_key not in third["reconciliation"]["diff"]["retired"] + assert requests_key not in {node["stable_key"] for node in third["candidates"]["nodes"]} + assert any(tombstone["stable_key"] == requests_key for tombstone in third["tombstones"]) + + def _snapshot( *, replacement_scopes: list[dict[str, object]], @@ -206,6 +263,31 @@ def _anchor(source_kind: str, path: str) -> dict[str, object]: return anchor +def _write_pyproject(repo: Path, dependencies: list[str]) -> None: + dependency_lines = "\n".join(f' "{dependency}",' for dependency in dependencies) + (repo / "pyproject.toml").write_text( + f""" +[project] +name = "fixture-service" +version = "0.1.0" +dependencies = [ +{dependency_lines} +] +""".lstrip(), + encoding="utf-8", + ) + + +def _assert_unique_candidate_keys(snapshot: dict[str, object]) -> None: + candidates = snapshot["candidates"] + assert isinstance(candidates, dict) + for collection_name in ("nodes", "edges", "attributes"): + collection = candidates[collection_name] + assert isinstance(collection, list) + stable_keys = [item["stable_key"] for item in collection] + assert len(stable_keys) == len(set(stable_keys)) + + def _validate_schema(schema_name: str, payload: dict[str, object]) -> None: validator = draft202012_validator(Path("schemas") / schema_name) validator.validate(payload) diff --git a/workplans/RAIL-FAB-WP-0010-repo-reality-scanner.md b/workplans/RAIL-FAB-WP-0010-repo-reality-scanner.md index 1f48c9e..96a889b 100644 --- a/workplans/RAIL-FAB-WP-0010-repo-reality-scanner.md +++ b/workplans/RAIL-FAB-WP-0010-repo-reality-scanner.md @@ -4,7 +4,7 @@ type: workplan title: "Repo Reality Scanner" domain: railiance repo: railiance-fabric -status: active +status: finished owner: codex topic_slug: railiance planning_priority: high @@ -295,7 +295,7 @@ Acceptance notes: ```task id: RAIL-FAB-WP-0010-T08 -status: todo +status: done priority: medium state_hub_task_id: "7a5b7dd7-92c6-4ac5-ae4d-6e73f75aac0d" ```