chore(consistency): sync task status from DB [auto]

Updated by fix-consistency on 2026-05-15:
  - update .custodian-brief.md for repo-scoping
This commit is contained in:
2026-05-15 21:14:21 +02:00
parent f38ed6847c
commit 084159e51c
42 changed files with 5 additions and 5 deletions

View File

@@ -0,0 +1 @@
"""Candidate ability graph generation."""

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,273 @@
from __future__ import annotations
import re
from dataclasses import replace
from repo_registry.candidate_graph.generator import (
CandidateAbilityDraft,
CandidateCapabilityDraft,
CandidateEvidenceDraft,
CandidateFeatureDraft,
)
from repo_registry.core.models import SourceReference
STOP_WORDS = {
"a",
"an",
"and",
"capability",
"feature",
"for",
"models",
"model",
"of",
"support",
"supports",
"the",
"to",
"use",
"uses",
"using",
}
DISTINCTIVE_TOKENS = {
"anthropic",
"claude",
"gemini",
"openai",
"openrouter",
}
def normalize_candidate_drafts(
abilities: list[CandidateAbilityDraft],
) -> list[CandidateAbilityDraft]:
return _merge_abilities(abilities)
def _merge_abilities(
abilities: list[CandidateAbilityDraft],
) -> list[CandidateAbilityDraft]:
merged: list[CandidateAbilityDraft] = []
for ability in abilities:
index = _find_overlap(merged, ability.name)
if index is None:
merged.append(
replace(
ability,
capabilities=_merge_capabilities(ability.capabilities),
)
)
continue
merged[index] = _combine_abilities(merged[index], ability)
return merged
def _combine_abilities(
left: CandidateAbilityDraft,
right: CandidateAbilityDraft,
) -> CandidateAbilityDraft:
return CandidateAbilityDraft(
name=_preferred_name(left.name, right.name),
description=_preferred_description(left.description, right.description),
confidence=max(left.confidence, right.confidence),
source_refs=_merge_source_refs(left.source_refs, right.source_refs),
primary_class=_preferred_text(left.primary_class, right.primary_class),
attributes=_merge_strings(left.attributes, right.attributes),
capabilities=_merge_capabilities(left.capabilities + right.capabilities),
)
def _merge_capabilities(
capabilities: list[CandidateCapabilityDraft],
) -> list[CandidateCapabilityDraft]:
merged: list[CandidateCapabilityDraft] = []
for capability in capabilities:
index = _find_overlap(merged, capability.name)
if index is None:
merged.append(
replace(
capability,
features=_merge_features(capability.features),
evidence=_merge_evidence(capability.evidence),
)
)
continue
merged[index] = _combine_capabilities(merged[index], capability)
return merged
def _combine_capabilities(
left: CandidateCapabilityDraft,
right: CandidateCapabilityDraft,
) -> CandidateCapabilityDraft:
return CandidateCapabilityDraft(
name=_preferred_name(left.name, right.name),
description=_preferred_description(left.description, right.description),
inputs=_merge_strings(left.inputs, right.inputs),
outputs=_merge_strings(left.outputs, right.outputs),
confidence=max(left.confidence, right.confidence),
source_refs=_merge_source_refs(left.source_refs, right.source_refs),
primary_class=_preferred_text(left.primary_class, right.primary_class),
attributes=_merge_strings(left.attributes, right.attributes),
features=_merge_features(left.features + right.features),
evidence=_merge_evidence(left.evidence + right.evidence),
)
def _merge_features(
features: list[CandidateFeatureDraft],
) -> list[CandidateFeatureDraft]:
merged: list[CandidateFeatureDraft] = []
for feature in features:
index = _find_overlap(merged, feature.name)
if index is None:
merged.append(feature)
continue
existing = merged[index]
merged[index] = CandidateFeatureDraft(
name=_preferred_name(existing.name, feature.name),
type=_preferred_text(existing.type, feature.type),
location=_preferred_text(existing.location, feature.location),
confidence=max(existing.confidence, feature.confidence),
source_refs=_merge_source_refs(existing.source_refs, feature.source_refs),
primary_class=_preferred_text(existing.primary_class, feature.primary_class),
attributes=_merge_strings(existing.attributes, feature.attributes),
)
return merged
def _merge_evidence(
evidence_items: list[CandidateEvidenceDraft],
) -> list[CandidateEvidenceDraft]:
merged: list[CandidateEvidenceDraft] = []
seen: set[tuple[str, str]] = set()
for evidence in evidence_items:
key = (_normalize_text(evidence.type), _normalize_path(evidence.reference))
if key not in seen:
seen.add(key)
merged.append(evidence)
continue
index = next(
index
for index, item in enumerate(merged)
if (_normalize_text(item.type), _normalize_path(item.reference)) == key
)
existing = merged[index]
merged[index] = CandidateEvidenceDraft(
type=_preferred_text(existing.type, evidence.type),
reference=_preferred_text(existing.reference, evidence.reference),
strength=_stronger_evidence(existing.strength, evidence.strength),
source_refs=_merge_source_refs(existing.source_refs, evidence.source_refs),
)
return merged
def _find_overlap(items: list, name: str) -> int | None:
for index, item in enumerate(items):
if _names_overlap(item.name, name):
return index
return None
def _names_overlap(left: str, right: str) -> bool:
if _normalize_text(left) == _normalize_text(right):
return True
left_tokens = _tokens(left)
right_tokens = _tokens(right)
if not left_tokens or not right_tokens:
return False
intersection = left_tokens & right_tokens
union = left_tokens | right_tokens
overlap = len(intersection) / len(union)
containment = len(intersection) / min(len(left_tokens), len(right_tokens))
if intersection & DISTINCTIVE_TOKENS and containment >= 0.8:
return True
return overlap >= 0.6 or (containment >= 0.8 and len(intersection) >= 2)
def _tokens(value: str) -> set[str]:
return {
_stem(token)
for token in re.findall(r"[a-z0-9]+", value.lower())
if token not in STOP_WORDS
}
def _stem(token: str) -> str:
if token.endswith("ies") and len(token) > 4:
return f"{token[:-3]}y"
if token.endswith("s") and len(token) > 3:
return token[:-1]
return token
def _normalize_text(value: str) -> str:
return " ".join(sorted(_tokens(value)))
def _normalize_path(value: str) -> str:
return value.strip().lower()
def _preferred_name(left: str, right: str) -> str:
return _preferred_text(left, right)
def _preferred_description(left: str, right: str) -> str:
if not left.strip():
return right.strip()
if not right.strip():
return left.strip()
if _normalize_sentence(left) == _normalize_sentence(right):
return max((left.strip(), right.strip()), key=len)
return max((left.strip(), right.strip()), key=len)
def _normalize_sentence(value: str) -> str:
return re.sub(r"\s+", " ", value.strip().lower())
def _preferred_text(left: str, right: str) -> str:
if not left:
return right
if not right:
return left
return max((left, right), key=lambda item: (len(_tokens(item)), len(item)))
def _merge_strings(left: list[str], right: list[str]) -> list[str]:
merged: list[str] = []
seen: set[str] = set()
for value in left + right:
key = _normalize_value(value)
if key in seen:
continue
seen.add(key)
merged.append(value)
return merged
def _normalize_value(value: str) -> str:
return " ".join(re.findall(r"[a-z0-9]+", value.lower()))
def _merge_source_refs(
left: list[SourceReference],
right: list[SourceReference],
) -> list[SourceReference]:
merged: list[SourceReference] = []
seen: set[tuple[int | None, str, str, str, int | None]] = set()
for ref in left + right:
key = (ref.fact_id, ref.path, ref.kind, ref.name, ref.line)
if key in seen:
continue
seen.add(key)
merged.append(ref)
return merged
def _stronger_evidence(left: str, right: str) -> str:
ranking = {"weak": 0, "medium": 1, "strong": 2}
return left if ranking.get(left, 1) >= ranking.get(right, 1) else right