Fix path-scoped duplicate detection

This commit is contained in:
2026-05-20 23:42:21 +02:00
parent 56b73dbab0
commit 5e0ffea585
3 changed files with 55 additions and 8 deletions

View File

@@ -16,6 +16,8 @@ ORIGIN_PRECEDENCE = {
"manual": 5,
}
PATH_SCOPED_NODE_KINDS = {"lockfile"}
def reconcile_discovery_snapshots(
previous: dict[str, Any] | None,
@@ -159,14 +161,15 @@ def _merge_candidate(
def _node_conflicts(by_key: dict[str, dict[str, Any]]) -> list[dict[str, object]]:
conflicts: list[dict[str, object]] = []
seen: dict[tuple[str, str], str] = {}
seen: dict[tuple[str, str], list[str]] = {}
for key, node in sorted(by_key.items()):
kind = str(node.get("kind") or "")
labels = _node_identity_labels(node)
for label in labels:
match_key = (normalize_identity_part(kind), normalize_identity_part(label))
other = seen.get(match_key)
if other and other != key:
for other in seen.get(match_key, []):
if other == key or _path_scoped_nodes_are_distinct(by_key.get(other), node):
continue
conflicts.append(
{
"type": "possible_duplicate_node",
@@ -175,7 +178,7 @@ def _node_conflicts(by_key: dict[str, dict[str, Any]]) -> list[dict[str, object]
"match": {"kind": kind, "label": label},
}
)
seen.setdefault(match_key, key)
seen.setdefault(match_key, []).append(key)
return _unique_conflicts(conflicts)
@@ -282,6 +285,27 @@ def _node_identity_labels(node: dict[str, Any]) -> list[str]:
])
def _path_scoped_nodes_are_distinct(left: dict[str, Any] | None, right: dict[str, Any]) -> bool:
if not left:
return False
left_kind = normalize_identity_part(str(left.get("kind") or ""))
right_kind = normalize_identity_part(str(right.get("kind") or ""))
if left_kind != right_kind or left_kind not in PATH_SCOPED_NODE_KINDS:
return False
left_paths = _source_anchor_paths(left)
right_paths = _source_anchor_paths(right)
return bool(left_paths and right_paths and left_paths.isdisjoint(right_paths))
def _source_anchor_paths(candidate: dict[str, Any]) -> set[str]:
anchors = candidate.get("source_anchors") if isinstance(candidate.get("source_anchors"), list) else []
return {
str(anchor.get("path") or "")
for anchor in anchors
if isinstance(anchor, dict) and anchor.get("path")
}
def _candidate_fingerprint(candidate: dict[str, Any]) -> str:
ignored = {"provenance"}
stable = {

View File

@@ -80,6 +80,28 @@ def test_reconciliation_dedupes_diffs_and_tombstones_by_scope() -> None:
assert any(tombstone["stable_key"].endswith("ancient-api") for tombstone in reconciled["tombstones"])
def test_reconciliation_keeps_distinct_path_scoped_lockfiles_separate() -> None:
scope = _scope("lockfiles", "file", "var/checkouts", "replacement")
uv_a = "discovery:fixture-repo:lockfile:var-checkouts-a-uv.lock"
uv_b = "discovery:fixture-repo:lockfile:var-checkouts-b-uv.lock"
current = _snapshot(
replacement_scopes=[scope],
nodes=[
_node(uv_a, "Lockfile", "uv.lock", scope["id"], source_path="var/checkouts/a/uv.lock"),
_node(uv_b, "Lockfile", "uv.lock", scope["id"], source_path="var/checkouts/b/uv.lock"),
],
)
reconciled = reconcile_discovery_snapshots(None, current, retired_at="2026-05-19T00:00:00Z")
assert reconciled["reconciliation"]["conflicts"] == []
assert reconciled["reconciliation"]["diff"]["conflicted"] == []
nodes = {node["stable_key"]: node for node in reconciled["candidates"]["nodes"]}
assert nodes[uv_a]["review_state"] == "candidate"
assert nodes[uv_b]["review_state"] == "candidate"
def test_scan_cli_reconciles_against_previous_snapshot(tmp_path: Path, capsys) -> None:
repo = tmp_path / "fixture-repo"
repo.mkdir()
@@ -231,8 +253,9 @@ def _node(
review_state: str = "candidate",
aliases: list[str] | None = None,
attributes: dict[str, object] | None = None,
source_path: str = "README.md",
) -> dict[str, object]:
anchor = _anchor("file", "README.md")
anchor = _anchor("file", source_path)
return {
"stable_key": stable_key,
"kind": kind,

View File

@@ -52,7 +52,7 @@ path-scoped evidence.
```task
id: RAIL-FAB-WP-0013-T01
status: todo
status: done
priority: high
state_hub_task_id: "8d9cb1c2-77f7-45da-b942-a8d68454477a"
```
@@ -64,7 +64,7 @@ not raise `possible_duplicate_node`, while same-label semantic nodes still do.
```task
id: RAIL-FAB-WP-0013-T02
status: todo
status: done
priority: high
state_hub_task_id: "7d4d439d-4f7f-4bdd-bc5f-b02524a10684"
```
@@ -76,7 +76,7 @@ source anchors before emitting duplicate conflicts.
```task
id: RAIL-FAB-WP-0013-T03
status: todo
status: in_progress
priority: high
state_hub_task_id: "0d6cad1d-16f8-4ac7-b8d3-b62a2f8e4549"
```