From 17356a41d6da70c98e21d9165409e91815746811 Mon Sep 17 00:00:00 2001 From: tegwick Date: Tue, 19 May 2026 04:49:08 +0200 Subject: [PATCH] Add discovery reconciliation engine --- docs/repo-reality-scanner.md | 40 ++ railiance_fabric/cli.py | 22 ++ railiance_fabric/reconciliation.py | 372 ++++++++++++++++++ schemas/discovery-snapshot.schema.yaml | 29 ++ tests/test_reconciliation.py | 211 ++++++++++ .../RAIL-FAB-WP-0010-repo-reality-scanner.md | 2 +- 6 files changed, 675 insertions(+), 1 deletion(-) create mode 100644 railiance_fabric/reconciliation.py create mode 100644 tests/test_reconciliation.py diff --git a/docs/repo-reality-scanner.md b/docs/repo-reality-scanner.md index 08d80b2..3e9a675 100644 --- a/docs/repo-reality-scanner.md +++ b/docs/repo-reality-scanner.md @@ -91,6 +91,46 @@ candidates. Unresolved edge endpoints or attribute targets also become review artifacts. Accepted graph data still requires deterministic evidence, repo-owned declarations, or a later human review/acceptance path. +## Reconciliation And Dry-Run Diffs + +Scans can be reconciled against a previous discovery snapshot: + +```bash +railiance-fabric scan . \ + --repo-slug railiance-fabric \ + --previous-snapshot previous-discovery.json \ + --dry-run \ + --output current-discovery.json +``` + +The reconciler writes `reconciliation.diff` with explicit stable-key sets: + +- `added` +- `changed` +- `retired` +- `conflicted` + +It deduplicates candidates by stable key, merges source anchors and provenance, +and applies source-aware precedence when duplicate candidates disagree. The +current precedence is: + +1. `repo_declaration` +2. `deterministic` +3. `catalog` +4. `registry` +5. `llm` +6. `manual` + +Possible duplicates found through matching aliases, normalized labels, +relationship endpoints, or attribute targets are not silently merged. They are +marked `status: conflicted`, moved to `review_state: needs_review`, and listed +under `reconciliation.conflicts`. + +Missing previous candidates become tombstones only when their replacement scope +is present in the current scan and has `mode: replacement`. Missing candidates +from additive scopes, such as broad LLM evidence bundles, are left alone. +Existing tombstones are preserved so repeated scans can explain graph drift. + ## Identity Identity is the main safety boundary. The scanner must not append guesses on diff --git a/railiance_fabric/cli.py b/railiance_fabric/cli.py index 2a2b8d0..106e4c3 100644 --- a/railiance_fabric/cli.py +++ b/railiance_fabric/cli.py @@ -14,6 +14,7 @@ from .loader import declaration_files, load_yaml from .graph import FabricGraph, build_graph from .graph_explorer import fabric_graph_explorer_payload from .llm_extraction import LLMExtractionConfig +from .reconciliation import reconcile_discovery_snapshots from .scanner import ScanOptions, scan_repo from .validation import validate_roots @@ -73,6 +74,7 @@ def build_parser() -> argparse.ArgumentParser: scan.add_argument("--profile", default="deterministic") scan.add_argument("--dry-run", action="store_true", help="Do not write anywhere except an explicit --output file.") scan.add_argument("--output", type=Path, default=None, help="Write the discovery snapshot JSON to a file.") + scan.add_argument("--previous-snapshot", type=Path, default=None, help="Reconcile against a previous discovery snapshot JSON.") scan.add_argument("--json", action="store_true", help="Print the discovery snapshot JSON to stdout.") scan.add_argument("--llm", action="store_true", help="Enable llm-connect assisted extraction.") scan.add_argument("--llm-provider", default="mock", help="llm-connect provider name.") @@ -410,6 +412,16 @@ def _scan_repo(args: argparse.Namespace) -> int: ), ) ) + if args.previous_snapshot: + try: + previous = json.loads(args.previous_snapshot.read_text(encoding="utf-8")) + except Exception as exc: + print(f"ERROR {args.previous_snapshot}: cannot read previous snapshot: {exc}", file=sys.stderr) + return 1 + if not isinstance(previous, dict): + print(f"ERROR {args.previous_snapshot}: previous snapshot must be a JSON object", file=sys.stderr) + return 1 + snapshot = reconcile_discovery_snapshots(previous, snapshot) payload = json.dumps(snapshot, indent=2, sort_keys=True) if args.output: args.output.parent.mkdir(parents=True, exist_ok=True) @@ -421,6 +433,15 @@ def _scan_repo(args: argparse.Namespace) -> int: candidates = snapshot["candidates"] review_count = len(snapshot.get("review_artifacts", [])) review_summary = f", {review_count} review artifact(s)" if review_count else "" + diff = snapshot.get("reconciliation", {}).get("diff", {}) + diff_summary = "" + if isinstance(diff, dict): + diff_summary = ( + f", diff +{len(diff.get('added', []))}" + f"/~{len(diff.get('changed', []))}" + f"/-{len(diff.get('retired', []))}" + f"/!{len(diff.get('conflicted', []))}" + ) mode = "dry-run " if args.dry_run else "" print( f"{mode}scan {snapshot['source']['repo_slug']} " @@ -430,6 +451,7 @@ def _scan_repo(args: argparse.Namespace) -> int: f"{len(candidates['attributes'])} attribute(s), " f"{len(snapshot['replacement_scopes'])} replacement scope(s)" f"{review_summary}" + f"{diff_summary}" ) if args.output: print(f"wrote {args.output}") diff --git a/railiance_fabric/reconciliation.py b/railiance_fabric/reconciliation.py new file mode 100644 index 0000000..a893ffd --- /dev/null +++ b/railiance_fabric/reconciliation.py @@ -0,0 +1,372 @@ +from __future__ import annotations + +import json +from datetime import datetime, timezone +from typing import Any, Iterable + +from .discovery import normalize_identity_part, short_fingerprint + + +ORIGIN_PRECEDENCE = { + "repo_declaration": 0, + "deterministic": 1, + "catalog": 2, + "registry": 3, + "llm": 4, + "manual": 5, +} + + +def reconcile_discovery_snapshots( + previous: dict[str, Any] | None, + current: dict[str, Any], + *, + retired_at: str | None = None, +) -> dict[str, Any]: + """Merge, deduplicate, diff, and tombstone discovery candidates.""" + + result = _copy_json(current) + retired_at = retired_at or _utc_now() + previous = previous or {} + conflicts: list[dict[str, object]] = [] + diff = {"added": set(), "changed": set(), "retired": set(), "conflicted": set()} + replacement_scopes = _scope_modes(result) + result_candidates = result.setdefault("candidates", {"nodes": [], "edges": [], "attributes": []}) + + for collection in ("nodes", "edges", "attributes"): + previous_map = _candidate_map(previous, collection) + current_items = [ + item for item in result_candidates.get(collection, []) + if isinstance(item, dict) and item.get("stable_key") + ] + deduped, collection_conflicts = _dedupe_collection(collection, current_items) + conflicts.extend(collection_conflicts) + for conflict in collection_conflicts: + for key in conflict.get("candidates", []): + diff["conflicted"].add(str(key)) + + result_candidates[collection] = [deduped[key] for key in sorted(deduped)] + current_keys = set(deduped) + previous_keys = set(previous_map) + diff["added"].update(current_keys - previous_keys) + for key in current_keys & previous_keys: + if _candidate_fingerprint(deduped[key]) != _candidate_fingerprint(previous_map[key]): + diff["changed"].add(key) + for key in previous_keys - current_keys: + previous_candidate = previous_map[key] + scope_id = str(previous_candidate.get("replacement_scope") or "") + if replacement_scopes.get(scope_id) != "replacement": + continue + diff["retired"].add(key) + _append_tombstone( + result, + stable_key=key, + entity_kind=_entity_kind(collection), + replacement_scope=scope_id, + retired_at=retired_at, + previous_candidate=previous_candidate, + ) + + existing_conflicts = result.setdefault("reconciliation", {}).get("conflicts", []) + if not isinstance(existing_conflicts, list): + existing_conflicts = [] + result["reconciliation"]["conflicts"] = [*existing_conflicts, *conflicts] + result["reconciliation"]["diff"] = { + key: sorted(values) + for key, values in diff.items() + } + result["tombstones"] = _dedupe_tombstones([ + *list(previous.get("tombstones", []) if isinstance(previous.get("tombstones"), list) else []), + *list(result.get("tombstones", []) if isinstance(result.get("tombstones"), list) else []), + ]) + return result + + +def _dedupe_collection( + collection: str, + items: list[dict[str, Any]], +) -> tuple[dict[str, dict[str, Any]], list[dict[str, object]]]: + by_key: dict[str, dict[str, Any]] = {} + conflicts: list[dict[str, object]] = [] + for item in items: + key = str(item["stable_key"]) + by_key[key], merge_conflicts = _merge_candidate(collection, by_key.get(key), item) + conflicts.extend(merge_conflicts) + + if collection == "nodes": + conflicts.extend(_node_conflicts(by_key)) + elif collection == "edges": + conflicts.extend(_edge_conflicts(by_key)) + elif collection == "attributes": + conflicts.extend(_attribute_conflicts(by_key)) + + conflicted_keys = { + str(key) + for conflict in conflicts + for key in conflict.get("candidates", []) + } + for key in conflicted_keys: + candidate = by_key.get(key) + if candidate is not None: + candidate["status"] = "conflicted" + candidate["review_state"] = "needs_review" + return by_key, conflicts + + +def _merge_candidate( + collection: str, + existing: dict[str, Any] | None, + incoming: dict[str, Any], +) -> tuple[dict[str, Any], list[dict[str, object]]]: + if existing is None: + return _copy_json(incoming), [] + + winner, loser = _higher_precedence(existing, incoming) + merged = _copy_json(winner) + conflicts: list[dict[str, object]] = [] + for field in ("aliases", "provenance", "source_anchors"): + values = [*list(existing.get(field, [])), *list(incoming.get(field, []))] + if values: + merged[field] = _unique_json(values) if field != "aliases" else _unique_strings(values) + if isinstance(existing.get("confidence"), (int, float)) and isinstance(incoming.get("confidence"), (int, float)): + merged["confidence"] = max(float(existing["confidence"]), float(incoming["confidence"])) + + if collection == "attributes": + if existing.get("value") != incoming.get("value"): + merged["value"] = winner.get("value") + conflicts.append(_conflict("attribute_value_conflict", [existing, incoming], "candidate attribute values differ")) + return merged, conflicts + + existing_attrs = existing.get("attributes") if isinstance(existing.get("attributes"), dict) else {} + incoming_attrs = incoming.get("attributes") if isinstance(incoming.get("attributes"), dict) else {} + merged_attrs = dict(existing_attrs) + for key, value in incoming_attrs.items(): + if key not in merged_attrs: + merged_attrs[key] = value + continue + if merged_attrs[key] == value: + continue + selected = winner.get("attributes", {}).get(key) if isinstance(winner.get("attributes"), dict) else value + merged_attrs[key] = selected + conflicts.append(_conflict("attribute_conflict", [existing, incoming], f"attribute {key!r} differs")) + if merged_attrs: + loser_attrs = loser.get("attributes") if isinstance(loser.get("attributes"), dict) else {} + for key, value in loser_attrs.items(): + merged_attrs.setdefault(key, value) + merged["attributes"] = merged_attrs + return merged, conflicts + + +def _node_conflicts(by_key: dict[str, dict[str, Any]]) -> list[dict[str, object]]: + conflicts: list[dict[str, object]] = [] + seen: dict[tuple[str, str], str] = {} + for key, node in sorted(by_key.items()): + kind = str(node.get("kind") or "") + labels = _node_identity_labels(node) + for label in labels: + match_key = (normalize_identity_part(kind), normalize_identity_part(label)) + other = seen.get(match_key) + if other and other != key: + conflicts.append( + { + "type": "possible_duplicate_node", + "reason": "same kind with matching normalized label or alias", + "candidates": sorted([other, key]), + "match": {"kind": kind, "label": label}, + } + ) + seen.setdefault(match_key, key) + return _unique_conflicts(conflicts) + + +def _edge_conflicts(by_key: dict[str, dict[str, Any]]) -> list[dict[str, object]]: + conflicts: list[dict[str, object]] = [] + seen: dict[tuple[str, str, str], str] = {} + for key, edge in sorted(by_key.items()): + match_key = ( + str(edge.get("source_key") or ""), + str(edge.get("edge_type") or ""), + str(edge.get("target_key") or ""), + ) + other = seen.get(match_key) + if other and other != key: + conflicts.append( + { + "type": "possible_duplicate_edge", + "reason": "same source, target, and edge type", + "candidates": sorted([other, key]), + "match": { + "source_key": match_key[0], + "edge_type": match_key[1], + "target_key": match_key[2], + }, + } + ) + seen.setdefault(match_key, key) + return _unique_conflicts(conflicts) + + +def _attribute_conflicts(by_key: dict[str, dict[str, Any]]) -> list[dict[str, object]]: + conflicts: list[dict[str, object]] = [] + seen: dict[tuple[str, str], str] = {} + for key, attribute in sorted(by_key.items()): + match_key = (str(attribute.get("entity_key") or ""), str(attribute.get("name") or "")) + other = seen.get(match_key) + if other and other != key: + conflicts.append( + { + "type": "possible_duplicate_attribute", + "reason": "same entity and attribute name", + "candidates": sorted([other, key]), + "match": {"entity_key": match_key[0], "name": match_key[1]}, + } + ) + seen.setdefault(match_key, key) + return _unique_conflicts(conflicts) + + +def _append_tombstone( + snapshot: dict[str, Any], + *, + stable_key: str, + entity_kind: str, + replacement_scope: str, + retired_at: str, + previous_candidate: dict[str, Any], +) -> None: + tombstones = snapshot.setdefault("tombstones", []) + tombstones.append( + { + "stable_key": stable_key, + "entity_kind": entity_kind, + "replacement_scope": replacement_scope, + "retired_at": retired_at, + "reason": "source_missing", + "previous_candidate": previous_candidate, + } + ) + + +def _candidate_map(snapshot: dict[str, Any], collection: str) -> dict[str, dict[str, Any]]: + candidates = snapshot.get("candidates") if isinstance(snapshot.get("candidates"), dict) else {} + items = candidates.get(collection) if isinstance(candidates.get(collection), list) else [] + return { + str(item["stable_key"]): item + for item in items + if isinstance(item, dict) and item.get("stable_key") + } + + +def _scope_modes(snapshot: dict[str, Any]) -> dict[str, str]: + scopes = snapshot.get("replacement_scopes") if isinstance(snapshot.get("replacement_scopes"), list) else [] + return { + str(scope.get("id")): str(scope.get("mode") or "") + for scope in scopes + if isinstance(scope, dict) and scope.get("id") + } + + +def _higher_precedence(left: dict[str, Any], right: dict[str, Any]) -> tuple[dict[str, Any], dict[str, Any]]: + left_rank = ORIGIN_PRECEDENCE.get(str(left.get("origin") or ""), 99) + right_rank = ORIGIN_PRECEDENCE.get(str(right.get("origin") or ""), 99) + if right_rank < left_rank: + return right, left + return left, right + + +def _node_identity_labels(node: dict[str, Any]) -> list[str]: + return _unique_strings([ + node.get("label") or "", + node.get("graph_id") or "", + *_as_list(node.get("aliases")), + ]) + + +def _candidate_fingerprint(candidate: dict[str, Any]) -> str: + ignored = {"provenance"} + stable = { + key: value + for key, value in candidate.items() + if key not in ignored + } + return short_fingerprint(stable, length=20) + + +def _conflict(conflict_type: str, candidates: list[dict[str, Any]], reason: str) -> dict[str, object]: + return { + "type": conflict_type, + "reason": reason, + "candidates": sorted(str(candidate.get("stable_key") or "") for candidate in candidates), + } + + +def _unique_conflicts(conflicts: Iterable[dict[str, object]]) -> list[dict[str, object]]: + seen: set[str] = set() + result: list[dict[str, object]] = [] + for conflict in conflicts: + key = json.dumps(conflict, sort_keys=True, default=str) + if key in seen: + continue + seen.add(key) + result.append(conflict) + return result + + +def _dedupe_tombstones(tombstones: list[object]) -> list[dict[str, Any]]: + by_key: dict[str, dict[str, Any]] = {} + for tombstone in tombstones: + if not isinstance(tombstone, dict): + continue + key = ":".join( + [ + str(tombstone.get("stable_key") or ""), + str(tombstone.get("replacement_scope") or ""), + str(tombstone.get("reason") or ""), + ] + ) + by_key[key] = tombstone + return [by_key[key] for key in sorted(by_key)] + + +def _entity_kind(collection: str) -> str: + return { + "nodes": "node", + "edges": "edge", + "attributes": "attribute", + }[collection] + + +def _as_list(value: object) -> list[object]: + return value if isinstance(value, list) else [] + + +def _unique_strings(values: Iterable[object]) -> list[str]: + seen: set[str] = set() + result: list[str] = [] + for value in values: + text = str(value or "").strip() + if not text or text in seen: + continue + seen.add(text) + result.append(text) + return result + + +def _unique_json(values: Iterable[object]) -> list[object]: + seen: set[str] = set() + result: list[object] = [] + for value in values: + key = json.dumps(value, sort_keys=True, default=str) + if key in seen: + continue + seen.add(key) + result.append(value) + return result + + +def _copy_json(value: Any) -> Any: + return json.loads(json.dumps(value, default=str)) + + +def _utc_now() -> str: + return datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z") diff --git a/schemas/discovery-snapshot.schema.yaml b/schemas/discovery-snapshot.schema.yaml index c30be07..b846f63 100644 --- a/schemas/discovery-snapshot.schema.yaml +++ b/schemas/discovery-snapshot.schema.yaml @@ -126,6 +126,35 @@ properties: items: type: object additionalProperties: true + diff: + type: object + additionalProperties: false + required: + - added + - changed + - retired + - conflicted + properties: + added: + type: array + uniqueItems: true + items: + $ref: "#/$defs/stableKey" + changed: + type: array + uniqueItems: true + items: + $ref: "#/$defs/stableKey" + retired: + type: array + uniqueItems: true + items: + $ref: "#/$defs/stableKey" + conflicted: + type: array + uniqueItems: true + items: + $ref: "#/$defs/stableKey" $defs: stableKey: diff --git a/tests/test_reconciliation.py b/tests/test_reconciliation.py new file mode 100644 index 0000000..235de86 --- /dev/null +++ b/tests/test_reconciliation.py @@ -0,0 +1,211 @@ +from __future__ import annotations + +import json +from pathlib import Path + +from railiance_fabric.cli import main as cli_main +from railiance_fabric.discovery import discovery_stable_key, replacement_scope_id, source_fingerprint +from railiance_fabric.reconciliation import reconcile_discovery_snapshots +from railiance_fabric.scanner import ScanOptions, scan_repo +from railiance_fabric.schema_validation import draft202012_validator + + +def test_reconciliation_dedupes_diffs_and_tombstones_by_scope() -> None: + scope_replace = _scope("deterministic", "file", "README.md", "replacement") + scope_additive = _scope("llm-connect-repo-evidence", "llm", "bundle", "additive") + service_key = discovery_stable_key("fixture-repo", "ServiceDeclaration", "fixture.api") + old_key = discovery_stable_key("fixture-repo", "ServiceDeclaration", "old.api") + additive_old_key = discovery_stable_key("fixture-repo", "CapabilityDeclaration", "old-llm") + new_key = discovery_stable_key("fixture-repo", "CapabilityDeclaration", "fixture.ops") + duplicate_key = discovery_stable_key( + "fixture-repo", + "ServiceDeclaration", + "Fixture API", + source_anchor={"path": "README.md", "line_start": 5}, + ) + + previous = _snapshot( + replacement_scopes=[scope_replace, scope_additive], + nodes=[ + _node(service_key, "ServiceDeclaration", "Fixture API", scope_replace["id"], attributes={"owner": "old"}), + _node(old_key, "ServiceDeclaration", "Old API", scope_replace["id"]), + _node(additive_old_key, "CapabilityDeclaration", "Old LLM", scope_additive["id"], origin="llm"), + ], + tombstones=[ + { + "stable_key": discovery_stable_key("fixture-repo", "ServiceDeclaration", "Ancient API"), + "entity_kind": "node", + "replacement_scope": scope_replace["id"], + "retired_at": "2026-05-18T00:00:00Z", + "reason": "source_missing", + } + ], + ) + current = _snapshot( + replacement_scopes=[scope_replace, scope_additive], + nodes=[ + _node(service_key, "ServiceDeclaration", "Fixture API", scope_replace["id"], origin="llm", attributes={"owner": "llm"}), + _node( + service_key, + "ServiceDeclaration", + "Fixture API", + scope_replace["id"], + origin="repo_declaration", + review_state="accepted", + attributes={"owner": "declared"}, + ), + _node(new_key, "CapabilityDeclaration", "Fixture Ops", scope_replace["id"]), + _node(duplicate_key, "ServiceDeclaration", "Fixture API", scope_replace["id"], aliases=["fixture.api.copy"]), + ], + ) + + reconciled = reconcile_discovery_snapshots(previous, current, retired_at="2026-05-19T00:00:00Z") + + _validate_schema("discovery-snapshot.schema.yaml", reconciled) + nodes = {node["stable_key"]: node for node in reconciled["candidates"]["nodes"]} + assert nodes[service_key]["origin"] == "repo_declaration" + assert nodes[service_key]["attributes"]["owner"] == "declared" + assert nodes[service_key]["review_state"] == "needs_review" + assert nodes[duplicate_key]["status"] == "conflicted" + + diff = reconciled["reconciliation"]["diff"] + assert new_key in diff["added"] + assert duplicate_key in diff["added"] + assert service_key in diff["changed"] + assert old_key in diff["retired"] + assert additive_old_key not in diff["retired"] + assert {service_key, duplicate_key} <= set(diff["conflicted"]) + assert any(conflict["type"] == "possible_duplicate_node" for conflict in reconciled["reconciliation"]["conflicts"]) + assert any(tombstone["stable_key"] == old_key for tombstone in reconciled["tombstones"]) + assert any(tombstone["stable_key"].endswith("ancient-api") for tombstone in reconciled["tombstones"]) + + +def test_scan_cli_reconciles_against_previous_snapshot(tmp_path: Path, capsys) -> None: + repo = tmp_path / "fixture-repo" + repo.mkdir() + (repo / "README.md").write_text("# Fixture Repo\n", encoding="utf-8") + previous = scan_repo( + ScanOptions( + repo_path=repo, + repo_slug="fixture-repo", + repo_name="Fixture Repo", + commit="old", + ) + ) + scope_id = next(scope["id"] for scope in previous["replacement_scopes"] if scope["extractor_id"] == "repo-metadata") + vanished_key = discovery_stable_key("fixture-repo", "ServiceDeclaration", "Vanished API") + previous["candidates"]["nodes"].append( + _node(vanished_key, "ServiceDeclaration", "Vanished API", scope_id) + ) + previous_path = tmp_path / "previous.json" + output_path = tmp_path / "current.json" + previous_path.write_text(json.dumps(previous), encoding="utf-8") + + assert cli_main( + [ + "scan", + str(repo), + "--repo-slug", + "fixture-repo", + "--repo-name", + "Fixture Repo", + "--commit", + "new", + "--previous-snapshot", + str(previous_path), + "--output", + str(output_path), + ] + ) == 0 + + summary = capsys.readouterr().out + assert "diff +" in summary + payload = json.loads(output_path.read_text(encoding="utf-8")) + _validate_schema("discovery-snapshot.schema.yaml", payload) + assert vanished_key in payload["reconciliation"]["diff"]["retired"] + assert any(tombstone["stable_key"] == vanished_key for tombstone in payload["tombstones"]) + + +def _snapshot( + *, + replacement_scopes: list[dict[str, object]], + nodes: list[dict[str, object]], + tombstones: list[dict[str, object]] | None = None, +) -> dict[str, object]: + return { + "apiVersion": "railiance.fabric/v1alpha1", + "kind": "FabricDiscoverySnapshot", + "generated_at": "2026-05-19T00:00:00Z", + "source": {"repo_slug": "fixture-repo", "repo_name": "Fixture Repo", "commit": "abc123"}, + "scan": { + "run_id": "scan:fixture-repo:deterministic:abc123", + "profile": "deterministic", + "deterministic_only": True, + "llm_enabled": False, + }, + "replacement_scopes": replacement_scopes, + "candidates": {"nodes": nodes, "edges": [], "attributes": []}, + "tombstones": tombstones or [], + "reconciliation": { + "precedence": ["repo_declaration", "deterministic", "catalog", "registry", "llm", "manual"], + "duplicate_policy": "stable-key matches merge automatically", + "retirement_policy": "missing candidates retire only inside their replacement scope", + }, + } + + +def _scope(extractor_id: str, source_kind: str, source_path: str, mode: str) -> dict[str, object]: + return { + "id": replacement_scope_id("fixture-repo", extractor_id, source_kind, source_path=source_path), + "extractor_id": extractor_id, + "source_kind": source_kind, + "source_path": source_path, + "mode": mode, + } + + +def _node( + stable_key: str, + kind: str, + label: str, + replacement_scope: str, + *, + origin: str = "deterministic", + review_state: str = "candidate", + aliases: list[str] | None = None, + attributes: dict[str, object] | None = None, +) -> dict[str, object]: + anchor = _anchor("file", "README.md") + return { + "stable_key": stable_key, + "kind": kind, + "label": label, + "repo": "fixture-repo", + "aliases": aliases or [label], + "attributes": attributes or {}, + "origin": origin, + "review_state": review_state, + "status": "active", + "confidence": 0.9, + "replacement_scope": replacement_scope, + "provenance": [ + { + "extractor_id": "fixture", + "extractor_version": "0.1.0", + "method": "declaration" if origin == "repo_declaration" else "deterministic" if origin == "deterministic" else "llm", + "origin": origin, + } + ], + "source_anchors": [anchor], + } + + +def _anchor(source_kind: str, path: str) -> dict[str, object]: + anchor = {"source_kind": source_kind, "path": path} + anchor["fingerprint"] = source_fingerprint(anchor) + return anchor + + +def _validate_schema(schema_name: str, payload: dict[str, object]) -> None: + validator = draft202012_validator(Path("schemas") / schema_name) + validator.validate(payload) diff --git a/workplans/RAIL-FAB-WP-0010-repo-reality-scanner.md b/workplans/RAIL-FAB-WP-0010-repo-reality-scanner.md index b193b3f..a2e1230 100644 --- a/workplans/RAIL-FAB-WP-0010-repo-reality-scanner.md +++ b/workplans/RAIL-FAB-WP-0010-repo-reality-scanner.md @@ -199,7 +199,7 @@ Acceptance notes: ```task id: RAIL-FAB-WP-0010-T04 -status: todo +status: done priority: high state_hub_task_id: "f0844595-23e0-4e7a-bfd9-e0526b8f85b9" ```