generated from coulomb/repo-seed
Fix path-scoped duplicate detection
This commit is contained in:
@@ -16,6 +16,8 @@ ORIGIN_PRECEDENCE = {
|
||||
"manual": 5,
|
||||
}
|
||||
|
||||
PATH_SCOPED_NODE_KINDS = {"lockfile"}
|
||||
|
||||
|
||||
def reconcile_discovery_snapshots(
|
||||
previous: dict[str, Any] | None,
|
||||
@@ -159,14 +161,15 @@ def _merge_candidate(
|
||||
|
||||
def _node_conflicts(by_key: dict[str, dict[str, Any]]) -> list[dict[str, object]]:
|
||||
conflicts: list[dict[str, object]] = []
|
||||
seen: dict[tuple[str, str], str] = {}
|
||||
seen: dict[tuple[str, str], list[str]] = {}
|
||||
for key, node in sorted(by_key.items()):
|
||||
kind = str(node.get("kind") or "")
|
||||
labels = _node_identity_labels(node)
|
||||
for label in labels:
|
||||
match_key = (normalize_identity_part(kind), normalize_identity_part(label))
|
||||
other = seen.get(match_key)
|
||||
if other and other != key:
|
||||
for other in seen.get(match_key, []):
|
||||
if other == key or _path_scoped_nodes_are_distinct(by_key.get(other), node):
|
||||
continue
|
||||
conflicts.append(
|
||||
{
|
||||
"type": "possible_duplicate_node",
|
||||
@@ -175,7 +178,7 @@ def _node_conflicts(by_key: dict[str, dict[str, Any]]) -> list[dict[str, object]
|
||||
"match": {"kind": kind, "label": label},
|
||||
}
|
||||
)
|
||||
seen.setdefault(match_key, key)
|
||||
seen.setdefault(match_key, []).append(key)
|
||||
return _unique_conflicts(conflicts)
|
||||
|
||||
|
||||
@@ -282,6 +285,27 @@ def _node_identity_labels(node: dict[str, Any]) -> list[str]:
|
||||
])
|
||||
|
||||
|
||||
def _path_scoped_nodes_are_distinct(left: dict[str, Any] | None, right: dict[str, Any]) -> bool:
|
||||
if not left:
|
||||
return False
|
||||
left_kind = normalize_identity_part(str(left.get("kind") or ""))
|
||||
right_kind = normalize_identity_part(str(right.get("kind") or ""))
|
||||
if left_kind != right_kind or left_kind not in PATH_SCOPED_NODE_KINDS:
|
||||
return False
|
||||
left_paths = _source_anchor_paths(left)
|
||||
right_paths = _source_anchor_paths(right)
|
||||
return bool(left_paths and right_paths and left_paths.isdisjoint(right_paths))
|
||||
|
||||
|
||||
def _source_anchor_paths(candidate: dict[str, Any]) -> set[str]:
|
||||
anchors = candidate.get("source_anchors") if isinstance(candidate.get("source_anchors"), list) else []
|
||||
return {
|
||||
str(anchor.get("path") or "")
|
||||
for anchor in anchors
|
||||
if isinstance(anchor, dict) and anchor.get("path")
|
||||
}
|
||||
|
||||
|
||||
def _candidate_fingerprint(candidate: dict[str, Any]) -> str:
|
||||
ignored = {"provenance"}
|
||||
stable = {
|
||||
|
||||
@@ -80,6 +80,28 @@ def test_reconciliation_dedupes_diffs_and_tombstones_by_scope() -> None:
|
||||
assert any(tombstone["stable_key"].endswith("ancient-api") for tombstone in reconciled["tombstones"])
|
||||
|
||||
|
||||
def test_reconciliation_keeps_distinct_path_scoped_lockfiles_separate() -> None:
|
||||
scope = _scope("lockfiles", "file", "var/checkouts", "replacement")
|
||||
uv_a = "discovery:fixture-repo:lockfile:var-checkouts-a-uv.lock"
|
||||
uv_b = "discovery:fixture-repo:lockfile:var-checkouts-b-uv.lock"
|
||||
|
||||
current = _snapshot(
|
||||
replacement_scopes=[scope],
|
||||
nodes=[
|
||||
_node(uv_a, "Lockfile", "uv.lock", scope["id"], source_path="var/checkouts/a/uv.lock"),
|
||||
_node(uv_b, "Lockfile", "uv.lock", scope["id"], source_path="var/checkouts/b/uv.lock"),
|
||||
],
|
||||
)
|
||||
|
||||
reconciled = reconcile_discovery_snapshots(None, current, retired_at="2026-05-19T00:00:00Z")
|
||||
|
||||
assert reconciled["reconciliation"]["conflicts"] == []
|
||||
assert reconciled["reconciliation"]["diff"]["conflicted"] == []
|
||||
nodes = {node["stable_key"]: node for node in reconciled["candidates"]["nodes"]}
|
||||
assert nodes[uv_a]["review_state"] == "candidate"
|
||||
assert nodes[uv_b]["review_state"] == "candidate"
|
||||
|
||||
|
||||
def test_scan_cli_reconciles_against_previous_snapshot(tmp_path: Path, capsys) -> None:
|
||||
repo = tmp_path / "fixture-repo"
|
||||
repo.mkdir()
|
||||
@@ -231,8 +253,9 @@ def _node(
|
||||
review_state: str = "candidate",
|
||||
aliases: list[str] | None = None,
|
||||
attributes: dict[str, object] | None = None,
|
||||
source_path: str = "README.md",
|
||||
) -> dict[str, object]:
|
||||
anchor = _anchor("file", "README.md")
|
||||
anchor = _anchor("file", source_path)
|
||||
return {
|
||||
"stable_key": stable_key,
|
||||
"kind": kind,
|
||||
|
||||
@@ -52,7 +52,7 @@ path-scoped evidence.
|
||||
|
||||
```task
|
||||
id: RAIL-FAB-WP-0013-T01
|
||||
status: todo
|
||||
status: done
|
||||
priority: high
|
||||
state_hub_task_id: "8d9cb1c2-77f7-45da-b942-a8d68454477a"
|
||||
```
|
||||
@@ -64,7 +64,7 @@ not raise `possible_duplicate_node`, while same-label semantic nodes still do.
|
||||
|
||||
```task
|
||||
id: RAIL-FAB-WP-0013-T02
|
||||
status: todo
|
||||
status: done
|
||||
priority: high
|
||||
state_hub_task_id: "7d4d439d-4f7f-4bdd-bc5f-b02524a10684"
|
||||
```
|
||||
@@ -76,7 +76,7 @@ source anchors before emitting duplicate conflicts.
|
||||
|
||||
```task
|
||||
id: RAIL-FAB-WP-0013-T03
|
||||
status: todo
|
||||
status: in_progress
|
||||
priority: high
|
||||
state_hub_task_id: "0d6cad1d-16f8-4ac7-b8d3-b62a2f8e4549"
|
||||
```
|
||||
|
||||
Reference in New Issue
Block a user