Close repo reality scanner rollout

This commit is contained in:
2026-05-19 18:36:48 +02:00
parent be7252019f
commit babbe88a46
3 changed files with 153 additions and 2 deletions

View File

@@ -258,6 +258,75 @@ After review, rerun with `--ingest` to store the snapshots. Add `--accept` only
when candidates marked `review_state: accepted` should be projected into the
registry graph.
## Scan Profiles And Review Workflow
The initial profile is `deterministic`, which means repo-local extraction plus
any explicitly enabled offline connectors. Additional profiles should be named
for the evidence policy they represent, for example `deterministic-llm-draft`
or `catalog-followup`. Keep profile names stable because per-repo previous
snapshots use `<slug>-<profile>.discovery.json`.
Recommended workflow:
1. Run `scan` or `registry scan-manifest` with `--dry-run`.
2. Reconcile with `--previous-snapshot` or `--previous-dir` when a prior
snapshot exists.
3. Review candidates with `review_state: needs_review`, `status: conflicted`,
tombstones, and review artifacts before accepting anything.
4. Store reviewed output with `registry ingest-discovery`.
5. Use `registry accept-discovery` or `registry scan-manifest --ingest --accept`
only for candidates whose review state is acceptable for projection.
## Failure Modes
Failures are captured close to the evidence source:
- Missing repo paths, invalid manifest entries, unreadable previous snapshots,
and registry request failures mark that repo as `status: error` in
`scan-manifest` without stopping other repos.
- Connector failures become review artifacts such as `connector_unavailable` or
`connector_failed`.
- LLM provider failures and malformed model output become `llm_execution_error`
or `llm_output_invalid` review artifacts.
- Low-confidence LLM candidates become `llm_low_confidence` artifacts instead
of graph candidates.
- Possible duplicates are marked as conflicts and left for review instead of
being silently merged.
## Rollout Dry Run
The first small local rollout ran on 2026-05-19:
```bash
railiance-fabric registry scan-manifest registry/local-repos.yaml \
--repo-slug repo-scoping \
--repo-slug llm-connect \
--repo-slug railiance-fabric \
--dry-run \
--connector local-fabric-registry
```
Result:
- `repo-scoping`: 18 nodes, 17 edges, 13 attributes
- `llm-connect`: 5 nodes, 4 edges, 13 attributes
- `railiance-fabric`: 55 nodes, 63 edges, 13 attributes
- summary: 3 scanned, 0 changed, 0 retired, 0 conflicted, 3 LLM skipped,
0 LLM failed, 0 accepted, 0 errors
Follow-up backlog from this first pass:
- Add a standard discovery snapshot directory, likely `.fabric-discovery/`, so
repeated dry-runs can reconcile by default.
- Add a previous-from-registry option so manifest scans can diff against the
latest stored discovery snapshot without exporting JSON first.
- Expand runtime/deployment extraction beyond local manifests to cover live
server and deployment inventory connectors.
- Add review UI affordances for conflicts, tombstones, and bulk acceptance once
enough repos have baseline snapshots.
- Define privacy and budget defaults before enabling non-mock LLM providers in
multi-repo scans.
## Identity
Identity is the main safety boundary. The scanner must not append guesses on

View File

@@ -126,6 +126,63 @@ def test_scan_cli_reconciles_against_previous_snapshot(tmp_path: Path, capsys) -
assert any(tombstone["stable_key"] == vanished_key for tombstone in payload["tombstones"])
def test_three_rescans_keep_stable_identity_and_retire_vanished_evidence(tmp_path: Path) -> None:
repo = tmp_path / "fixture-repo"
repo.mkdir()
(repo / "README.md").write_text("# Fixture Repo\n", encoding="utf-8")
_write_pyproject(repo, ["PyYAML>=6.0"])
first = scan_repo(
ScanOptions(
repo_path=repo,
repo_slug="fixture-repo",
repo_name="Fixture Repo",
commit="commit-1",
)
)
_assert_unique_candidate_keys(first)
_write_pyproject(repo, ["PyYAML>=6.0", "requests>=2.31"])
second = reconcile_discovery_snapshots(
first,
scan_repo(
ScanOptions(
repo_path=repo,
repo_slug="fixture-repo",
repo_name="Fixture Repo",
commit="commit-2",
)
),
)
_validate_schema("discovery-snapshot.schema.yaml", second)
_assert_unique_candidate_keys(second)
requests_key = discovery_stable_key("fixture-repo", "ExternalLibrary", "requests")
pyyaml_key = discovery_stable_key("fixture-repo", "ExternalLibrary", "PyYAML")
assert requests_key in second["reconciliation"]["diff"]["added"]
assert requests_key in {node["stable_key"] for node in second["candidates"]["nodes"]}
_write_pyproject(repo, ["PyYAML>=6.0"])
third = reconcile_discovery_snapshots(
second,
scan_repo(
ScanOptions(
repo_path=repo,
repo_slug="fixture-repo",
repo_name="Fixture Repo",
commit="commit-3",
)
),
)
_validate_schema("discovery-snapshot.schema.yaml", third)
_assert_unique_candidate_keys(third)
assert requests_key in third["reconciliation"]["diff"]["retired"]
assert pyyaml_key not in third["reconciliation"]["diff"]["retired"]
assert requests_key not in {node["stable_key"] for node in third["candidates"]["nodes"]}
assert any(tombstone["stable_key"] == requests_key for tombstone in third["tombstones"])
def _snapshot(
*,
replacement_scopes: list[dict[str, object]],
@@ -206,6 +263,31 @@ def _anchor(source_kind: str, path: str) -> dict[str, object]:
return anchor
def _write_pyproject(repo: Path, dependencies: list[str]) -> None:
dependency_lines = "\n".join(f' "{dependency}",' for dependency in dependencies)
(repo / "pyproject.toml").write_text(
f"""
[project]
name = "fixture-service"
version = "0.1.0"
dependencies = [
{dependency_lines}
]
""".lstrip(),
encoding="utf-8",
)
def _assert_unique_candidate_keys(snapshot: dict[str, object]) -> None:
candidates = snapshot["candidates"]
assert isinstance(candidates, dict)
for collection_name in ("nodes", "edges", "attributes"):
collection = candidates[collection_name]
assert isinstance(collection, list)
stable_keys = [item["stable_key"] for item in collection]
assert len(stable_keys) == len(set(stable_keys))
def _validate_schema(schema_name: str, payload: dict[str, object]) -> None:
validator = draft202012_validator(Path("schemas") / schema_name)
validator.validate(payload)

View File

@@ -4,7 +4,7 @@ type: workplan
title: "Repo Reality Scanner"
domain: railiance
repo: railiance-fabric
status: active
status: finished
owner: codex
topic_slug: railiance
planning_priority: high
@@ -295,7 +295,7 @@ Acceptance notes:
```task
id: RAIL-FAB-WP-0010-T08
status: todo
status: done
priority: medium
state_hub_task_id: "7a5b7dd7-92c6-4ac5-ae4d-6e73f75aac0d"
```