merge-duplicates slice and did a first polish

This commit is contained in:
2026-04-25 23:50:58 +02:00
parent 1d6d103bc2
commit 19d34efa37
6 changed files with 788 additions and 1 deletions

View File

@@ -643,6 +643,132 @@ class RegistryStore:
target_capability_id=target_capability_id,
)
def merge_candidate_ability(
self,
repository_id: int,
analysis_run_id: int,
source_ability_id: int,
target_ability_id: int,
) -> None:
if source_ability_id == target_ability_id:
raise ValueError("source and target candidate ability must be different")
self._ensure_candidate_row(
table="candidate_abilities",
label="target candidate ability",
repository_id=repository_id,
analysis_run_id=analysis_run_id,
candidate_id=target_ability_id,
)
with self.connect() as connection:
cursor = connection.execute(
"""
UPDATE candidate_abilities
SET status = 'merged'
WHERE id = ?
AND repository_id = ?
AND analysis_run_id = ?
AND status = 'candidate'
""",
(source_ability_id, repository_id, analysis_run_id),
)
if cursor.rowcount == 0:
raise NotFoundError(
"source candidate ability "
f"{source_ability_id} was not found for repository "
f"{repository_id} analysis run {analysis_run_id}"
)
connection.execute(
"""
UPDATE candidate_capabilities
SET ability_id = ?
WHERE ability_id = ? AND repository_id = ? AND analysis_run_id = ?
""",
(target_ability_id, source_ability_id, repository_id, analysis_run_id),
)
def merge_candidate_capability(
self,
repository_id: int,
analysis_run_id: int,
source_capability_id: int,
target_capability_id: int,
) -> None:
if source_capability_id == target_capability_id:
raise ValueError("source and target candidate capability must be different")
self._ensure_candidate_row(
table="candidate_capabilities",
label="target candidate capability",
repository_id=repository_id,
analysis_run_id=analysis_run_id,
candidate_id=target_capability_id,
)
with self.connect() as connection:
cursor = connection.execute(
"""
UPDATE candidate_capabilities
SET status = 'merged'
WHERE id = ?
AND repository_id = ?
AND analysis_run_id = ?
AND status = 'candidate'
""",
(source_capability_id, repository_id, analysis_run_id),
)
if cursor.rowcount == 0:
raise NotFoundError(
"source candidate capability "
f"{source_capability_id} was not found for repository "
f"{repository_id} analysis run {analysis_run_id}"
)
for table in ("candidate_features", "candidate_evidence"):
connection.execute(
f"""
UPDATE {table}
SET capability_id = ?
WHERE capability_id = ?
AND repository_id = ?
AND analysis_run_id = ?
""",
(
target_capability_id,
source_capability_id,
repository_id,
analysis_run_id,
),
)
def merge_candidate_feature(
self,
repository_id: int,
analysis_run_id: int,
source_feature_id: int,
target_feature_id: int,
) -> None:
self._merge_candidate_leaf(
table="candidate_features",
label="candidate feature",
repository_id=repository_id,
analysis_run_id=analysis_run_id,
source_id=source_feature_id,
target_id=target_feature_id,
)
def merge_candidate_evidence(
self,
repository_id: int,
analysis_run_id: int,
source_evidence_id: int,
target_evidence_id: int,
) -> None:
self._merge_candidate_leaf(
table="candidate_evidence",
label="candidate evidence",
repository_id=repository_id,
analysis_run_id=analysis_run_id,
source_id=source_evidence_id,
target_id=target_evidence_id,
)
def _ensure_candidate_row(
self,
*,
@@ -703,6 +829,43 @@ class RegistryStore:
f"{repository_id} analysis run {analysis_run_id}"
)
def _merge_candidate_leaf(
self,
*,
table: str,
label: str,
repository_id: int,
analysis_run_id: int,
source_id: int,
target_id: int,
) -> None:
if source_id == target_id:
raise ValueError(f"source and target {label} must be different")
self._ensure_candidate_row(
table=table,
label=f"target {label}",
repository_id=repository_id,
analysis_run_id=analysis_run_id,
candidate_id=target_id,
)
with self.connect() as connection:
cursor = connection.execute(
f"""
UPDATE {table}
SET status = 'merged'
WHERE id = ?
AND repository_id = ?
AND analysis_run_id = ?
AND status = 'candidate'
""",
(source_id, repository_id, analysis_run_id),
)
if cursor.rowcount == 0:
raise NotFoundError(
f"source {label} {source_id} was not found for repository "
f"{repository_id} analysis run {analysis_run_id}"
)
def _reject_candidate_leaf(
self,
*,