Close repo reality scanner rollout

2026-05-19 18:36:48 +02:00
parent be7252019f
commit babbe88a46
3 changed files with 153 additions and 2 deletions
--- a/docs/repo-reality-scanner.md
+++ b/docs/repo-reality-scanner.md
@@ -258,6 +258,75 @@ After review, rerun with `--ingest` to store the snapshots. Add `--accept` only
 when candidates marked `review_state: accepted` should be projected into the
 registry graph.

+## Scan Profiles And Review Workflow
+
+The initial profile is `deterministic`, which means repo-local extraction plus
+any explicitly enabled offline connectors. Additional profiles should be named
+for the evidence policy they represent, for example `deterministic-llm-draft`
+or `catalog-followup`. Keep profile names stable because per-repo previous
+snapshots use `<slug>-<profile>.discovery.json`.
+
+Recommended workflow:
+
+1. Run `scan` or `registry scan-manifest` with `--dry-run`.
+2. Reconcile with `--previous-snapshot` or `--previous-dir` when a prior
+   snapshot exists.
+3. Review candidates with `review_state: needs_review`, `status: conflicted`,
+   tombstones, and review artifacts before accepting anything.
+4. Store reviewed output with `registry ingest-discovery`.
+5. Use `registry accept-discovery` or `registry scan-manifest --ingest --accept`
+   only for candidates whose review state is acceptable for projection.
+
+## Failure Modes
+
+Failures are captured close to the evidence source:
+
+- Missing repo paths, invalid manifest entries, unreadable previous snapshots,
+  and registry request failures mark that repo as `status: error` in
+  `scan-manifest` without stopping other repos.
+- Connector failures become review artifacts such as `connector_unavailable` or
+  `connector_failed`.
+- LLM provider failures and malformed model output become `llm_execution_error`
+  or `llm_output_invalid` review artifacts.
+- Low-confidence LLM candidates become `llm_low_confidence` artifacts instead
+  of graph candidates.
+- Possible duplicates are marked as conflicts and left for review instead of
+  being silently merged.
+
+## Rollout Dry Run
+
+The first small local rollout ran on 2026-05-19:
+
+```bash
+railiance-fabric registry scan-manifest registry/local-repos.yaml \
+  --repo-slug repo-scoping \
+  --repo-slug llm-connect \
+  --repo-slug railiance-fabric \
+  --dry-run \
+  --connector local-fabric-registry
+```
+
+Result:
+
+- `repo-scoping`: 18 nodes, 17 edges, 13 attributes
+- `llm-connect`: 5 nodes, 4 edges, 13 attributes
+- `railiance-fabric`: 55 nodes, 63 edges, 13 attributes
+- summary: 3 scanned, 0 changed, 0 retired, 0 conflicted, 3 LLM skipped,
+  0 LLM failed, 0 accepted, 0 errors
+
+Follow-up backlog from this first pass:
+
+- Add a standard discovery snapshot directory, likely `.fabric-discovery/`, so
+  repeated dry-runs can reconcile by default.
+- Add a previous-from-registry option so manifest scans can diff against the
+  latest stored discovery snapshot without exporting JSON first.
+- Expand runtime/deployment extraction beyond local manifests to cover live
+  server and deployment inventory connectors.
+- Add review UI affordances for conflicts, tombstones, and bulk acceptance once
+  enough repos have baseline snapshots.
+- Define privacy and budget defaults before enabling non-mock LLM providers in
+  multi-repo scans.
+
 ## Identity

 Identity is the main safety boundary. The scanner must not append guesses on
--- a/tests/test_reconciliation.py
+++ b/tests/test_reconciliation.py
@@ -126,6 +126,63 @@ def test_scan_cli_reconciles_against_previous_snapshot(tmp_path: Path, capsys) -
    assert any(tombstone["stable_key"] == vanished_key for tombstone in payload["tombstones"])


+def test_three_rescans_keep_stable_identity_and_retire_vanished_evidence(tmp_path: Path) -> None:
+    repo = tmp_path / "fixture-repo"
+    repo.mkdir()
+    (repo / "README.md").write_text("# Fixture Repo\n", encoding="utf-8")
+    _write_pyproject(repo, ["PyYAML>=6.0"])
+
+    first = scan_repo(
+        ScanOptions(
+            repo_path=repo,
+            repo_slug="fixture-repo",
+            repo_name="Fixture Repo",
+            commit="commit-1",
+        )
+    )
+    _assert_unique_candidate_keys(first)
+
+    _write_pyproject(repo, ["PyYAML>=6.0", "requests>=2.31"])
+    second = reconcile_discovery_snapshots(
+        first,
+        scan_repo(
+            ScanOptions(
+                repo_path=repo,
+                repo_slug="fixture-repo",
+                repo_name="Fixture Repo",
+                commit="commit-2",
+            )
+        ),
+    )
+    _validate_schema("discovery-snapshot.schema.yaml", second)
+    _assert_unique_candidate_keys(second)
+
+    requests_key = discovery_stable_key("fixture-repo", "ExternalLibrary", "requests")
+    pyyaml_key = discovery_stable_key("fixture-repo", "ExternalLibrary", "PyYAML")
+    assert requests_key in second["reconciliation"]["diff"]["added"]
+    assert requests_key in {node["stable_key"] for node in second["candidates"]["nodes"]}
+
+    _write_pyproject(repo, ["PyYAML>=6.0"])
+    third = reconcile_discovery_snapshots(
+        second,
+        scan_repo(
+            ScanOptions(
+                repo_path=repo,
+                repo_slug="fixture-repo",
+                repo_name="Fixture Repo",
+                commit="commit-3",
+            )
+        ),
+    )
+    _validate_schema("discovery-snapshot.schema.yaml", third)
+    _assert_unique_candidate_keys(third)
+
+    assert requests_key in third["reconciliation"]["diff"]["retired"]
+    assert pyyaml_key not in third["reconciliation"]["diff"]["retired"]
+    assert requests_key not in {node["stable_key"] for node in third["candidates"]["nodes"]}
+    assert any(tombstone["stable_key"] == requests_key for tombstone in third["tombstones"])
+
+
 def _snapshot(
    *,
    replacement_scopes: list[dict[str, object]],
@@ -206,6 +263,31 @@ def _anchor(source_kind: str, path: str) -> dict[str, object]:
    return anchor


+def _write_pyproject(repo: Path, dependencies: list[str]) -> None:
+    dependency_lines = "\n".join(f'  "{dependency}",' for dependency in dependencies)
+    (repo / "pyproject.toml").write_text(
+        f"""
+[project]
+name = "fixture-service"
+version = "0.1.0"
+dependencies = [
+{dependency_lines}
+]
+""".lstrip(),
+        encoding="utf-8",
+    )
+
+
+def _assert_unique_candidate_keys(snapshot: dict[str, object]) -> None:
+    candidates = snapshot["candidates"]
+    assert isinstance(candidates, dict)
+    for collection_name in ("nodes", "edges", "attributes"):
+        collection = candidates[collection_name]
+        assert isinstance(collection, list)
+        stable_keys = [item["stable_key"] for item in collection]
+        assert len(stable_keys) == len(set(stable_keys))
+
+
 def _validate_schema(schema_name: str, payload: dict[str, object]) -> None:
    validator = draft202012_validator(Path("schemas") / schema_name)
    validator.validate(payload)
--- a/workplans/RAIL-FAB-WP-0010-repo-reality-scanner.md
+++ b/workplans/RAIL-FAB-WP-0010-repo-reality-scanner.md
@@ -4,7 +4,7 @@ type: workplan
 title: "Repo Reality Scanner"
 domain: railiance
 repo: railiance-fabric
-status: active
+status: finished
 owner: codex
 topic_slug: railiance
 planning_priority: high
@@ -295,7 +295,7 @@ Acceptance notes:

 ```task
 id: RAIL-FAB-WP-0010-T08
-status: todo
+status: done
 priority: medium
 state_hub_task_id: "7a5b7dd7-92c6-4ac5-ae4d-6e73f75aac0d"
 ```