generated from coulomb/repo-seed
Improved datamodel and deterministic generation
This commit is contained in:
@@ -21,6 +21,8 @@ class CandidateFeatureDraft:
|
||||
location: str
|
||||
confidence: float
|
||||
source_refs: list[SourceReference]
|
||||
primary_class: str = ""
|
||||
attributes: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@@ -31,6 +33,8 @@ class CandidateCapabilityDraft:
|
||||
outputs: list[str]
|
||||
confidence: float
|
||||
source_refs: list[SourceReference]
|
||||
primary_class: str = "capability"
|
||||
attributes: list[str] = field(default_factory=list)
|
||||
features: list[CandidateFeatureDraft] = field(default_factory=list)
|
||||
evidence: list[CandidateEvidenceDraft] = field(default_factory=list)
|
||||
|
||||
@@ -41,6 +45,8 @@ class CandidateAbilityDraft:
|
||||
description: str
|
||||
confidence: float
|
||||
source_refs: list[SourceReference]
|
||||
primary_class: str = "ability"
|
||||
attributes: list[str] = field(default_factory=list)
|
||||
capabilities: list[CandidateCapabilityDraft] = field(default_factory=list)
|
||||
|
||||
|
||||
@@ -68,6 +74,11 @@ class CandidateGraphGenerator:
|
||||
credential_configs = self._facts(facts, "credential_config")
|
||||
provider_registries = self._facts(facts, "provider_registry")
|
||||
fallback_policies = self._facts(facts, "fallback_policy")
|
||||
ability_primary_class, ability_attributes = self._ability_classification(
|
||||
repository,
|
||||
facts,
|
||||
chunks,
|
||||
)
|
||||
|
||||
ability_sources = docs or manifests or languages
|
||||
ability = CandidateAbilityDraft(
|
||||
@@ -82,6 +93,8 @@ class CandidateGraphGenerator:
|
||||
languages=languages,
|
||||
),
|
||||
source_refs=self._source_refs(ability_sources),
|
||||
primary_class=ability_primary_class,
|
||||
attributes=ability_attributes,
|
||||
capabilities=[],
|
||||
)
|
||||
|
||||
@@ -119,6 +132,12 @@ class CandidateGraphGenerator:
|
||||
docs=docs,
|
||||
),
|
||||
source_refs=self._source_refs(manifests + frameworks + languages),
|
||||
primary_class="repository-structure",
|
||||
attributes=self._structure_attributes(
|
||||
manifests,
|
||||
frameworks,
|
||||
languages,
|
||||
),
|
||||
evidence=self._evidence(tests, examples, docs),
|
||||
)
|
||||
)
|
||||
@@ -129,6 +148,8 @@ class CandidateGraphGenerator:
|
||||
description=ability.description,
|
||||
confidence=ability.confidence,
|
||||
source_refs=ability.source_refs,
|
||||
primary_class=ability.primary_class,
|
||||
attributes=ability.attributes,
|
||||
capabilities=capabilities,
|
||||
)
|
||||
]
|
||||
@@ -154,6 +175,8 @@ class CandidateGraphGenerator:
|
||||
docs=docs,
|
||||
),
|
||||
source_refs=self._source_refs(interfaces),
|
||||
primary_class="interface",
|
||||
attributes=self._interface_attributes(interfaces),
|
||||
features=features,
|
||||
evidence=self._evidence(tests, examples, docs),
|
||||
)
|
||||
@@ -181,6 +204,8 @@ class CandidateGraphGenerator:
|
||||
source_refs=self._source_refs(
|
||||
[fact for fact in providers if fact.name == provider]
|
||||
),
|
||||
primary_class="integration",
|
||||
attributes=["llm-provider", provider.lower()],
|
||||
)
|
||||
for provider in provider_names
|
||||
]
|
||||
@@ -192,6 +217,8 @@ class CandidateGraphGenerator:
|
||||
location=self._grouped_location(credentials),
|
||||
confidence=0.7,
|
||||
source_refs=self._source_refs(credentials),
|
||||
primary_class="configuration",
|
||||
attributes=["credential", "llm-provider"],
|
||||
)
|
||||
)
|
||||
if registries:
|
||||
@@ -202,6 +229,8 @@ class CandidateGraphGenerator:
|
||||
location=self._grouped_location(registries),
|
||||
confidence=0.65,
|
||||
source_refs=self._source_refs(registries),
|
||||
primary_class="backend",
|
||||
attributes=["provider-registry", "llm-provider"],
|
||||
)
|
||||
)
|
||||
if fallback_policies:
|
||||
@@ -212,6 +241,8 @@ class CandidateGraphGenerator:
|
||||
location=self._grouped_location(fallback_policies),
|
||||
confidence=0.6,
|
||||
source_refs=self._source_refs(fallback_policies),
|
||||
primary_class="backend",
|
||||
attributes=["fallback-policy", "llm-provider"],
|
||||
)
|
||||
)
|
||||
return CandidateCapabilityDraft(
|
||||
@@ -232,6 +263,13 @@ class CandidateGraphGenerator:
|
||||
source_refs=self._source_refs(
|
||||
providers + credentials + registries + fallback_policies
|
||||
),
|
||||
primary_class="llm-integration",
|
||||
attributes=self._llm_provider_attributes(
|
||||
providers,
|
||||
credentials,
|
||||
registries,
|
||||
fallback_policies,
|
||||
),
|
||||
features=features,
|
||||
evidence=self._evidence(tests, examples, docs),
|
||||
)
|
||||
@@ -256,6 +294,8 @@ class CandidateGraphGenerator:
|
||||
location=fact.path,
|
||||
confidence=0.65 if fact.value else 0.45,
|
||||
source_refs=self._source_refs([fact]),
|
||||
primary_class=feature_type,
|
||||
attributes=self._feature_attributes(feature_type, [fact]),
|
||||
)
|
||||
)
|
||||
continue
|
||||
@@ -271,6 +311,8 @@ class CandidateGraphGenerator:
|
||||
location=self._grouped_location(facts),
|
||||
confidence=self._grouped_interface_confidence(facts),
|
||||
source_refs=self._source_refs(facts),
|
||||
primary_class=feature_type,
|
||||
attributes=self._feature_attributes(feature_type, facts),
|
||||
)
|
||||
)
|
||||
return features
|
||||
@@ -357,6 +399,96 @@ class CandidateGraphGenerator:
|
||||
return "API"
|
||||
return "interface"
|
||||
|
||||
def _ability_classification(
|
||||
self,
|
||||
repository: Repository,
|
||||
facts: list[ObservedFact],
|
||||
chunks: list[ContentChunk],
|
||||
) -> tuple[str, list[str]]:
|
||||
text = " ".join(
|
||||
[
|
||||
repository.name,
|
||||
repository.description or "",
|
||||
" ".join(chunk.text[:600] for chunk in chunks if chunk.kind == "documentation"),
|
||||
" ".join(f"{fact.kind} {fact.name} {fact.value}" for fact in facts),
|
||||
]
|
||||
).lower()
|
||||
attributes: list[str] = []
|
||||
if any(token in text for token in ("repository", "repo", "registry")):
|
||||
attributes.append("repository")
|
||||
if any(token in text for token in ("ability", "capability", "feature")):
|
||||
return "repository-intelligence", self._unique(attributes + ["capability-mapping"])
|
||||
if any(token in text for token in ("llm", "openrouter", "claude", "model provider")):
|
||||
return "ai-integration", self._unique(attributes + ["llm-provider"])
|
||||
if any(fact.kind == "interface" for fact in facts):
|
||||
attributes.append("interface")
|
||||
return "developer-tooling", self._unique(attributes)
|
||||
|
||||
def _interface_attributes(self, interfaces: list[ObservedFact]) -> list[str]:
|
||||
feature_types = {self._feature_type(fact) for fact in interfaces}
|
||||
attributes = ["api" if item == "API" else "cli" if item == "CLI" else "callable" for item in feature_types]
|
||||
return self._unique(["surface", *attributes])
|
||||
|
||||
def _feature_attributes(
|
||||
self,
|
||||
feature_type: str,
|
||||
facts: list[ObservedFact],
|
||||
) -> list[str]:
|
||||
attributes = [feature_type]
|
||||
if feature_type == "API":
|
||||
attributes.extend(["surface", "http"])
|
||||
elif feature_type == "CLI":
|
||||
attributes.extend(["surface", "command"])
|
||||
else:
|
||||
attributes.append("surface")
|
||||
paths = " ".join(fact.path.lower() for fact in facts)
|
||||
if "test" in paths:
|
||||
attributes.append("test-linked")
|
||||
return self._unique(attributes)
|
||||
|
||||
def _structure_attributes(
|
||||
self,
|
||||
manifests: list[ObservedFact],
|
||||
frameworks: list[ObservedFact],
|
||||
languages: list[ObservedFact],
|
||||
) -> list[str]:
|
||||
return self._unique(
|
||||
[
|
||||
"manifest" if manifests else "",
|
||||
*[fact.name for fact in frameworks],
|
||||
*[fact.name for fact in languages],
|
||||
]
|
||||
)
|
||||
|
||||
def _llm_provider_attributes(
|
||||
self,
|
||||
providers: list[ObservedFact],
|
||||
credentials: list[ObservedFact],
|
||||
registries: list[ObservedFact],
|
||||
fallback_policies: list[ObservedFact],
|
||||
) -> list[str]:
|
||||
return self._unique(
|
||||
[
|
||||
"llm-provider",
|
||||
*[fact.name.lower() for fact in providers],
|
||||
"credential" if credentials else "",
|
||||
"provider-registry" if registries else "",
|
||||
"fallback-policy" if fallback_policies else "",
|
||||
]
|
||||
)
|
||||
|
||||
def _unique(self, values: list[str]) -> list[str]:
|
||||
result: list[str] = []
|
||||
seen: set[str] = set()
|
||||
for value in values:
|
||||
item = value.strip()
|
||||
key = item.lower()
|
||||
if not item or key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
result.append(item)
|
||||
return result
|
||||
|
||||
def _interface_inputs(self, interfaces: list[ObservedFact]) -> list[str]:
|
||||
feature_types = {self._feature_type(fact) for fact in interfaces}
|
||||
inputs: list[str] = []
|
||||
|
||||
@@ -73,6 +73,8 @@ def _combine_abilities(
|
||||
description=_preferred_description(left.description, right.description),
|
||||
confidence=max(left.confidence, right.confidence),
|
||||
source_refs=_merge_source_refs(left.source_refs, right.source_refs),
|
||||
primary_class=_preferred_text(left.primary_class, right.primary_class),
|
||||
attributes=_merge_strings(left.attributes, right.attributes),
|
||||
capabilities=_merge_capabilities(left.capabilities + right.capabilities),
|
||||
)
|
||||
|
||||
@@ -107,6 +109,8 @@ def _combine_capabilities(
|
||||
outputs=_merge_strings(left.outputs, right.outputs),
|
||||
confidence=max(left.confidence, right.confidence),
|
||||
source_refs=_merge_source_refs(left.source_refs, right.source_refs),
|
||||
primary_class=_preferred_text(left.primary_class, right.primary_class),
|
||||
attributes=_merge_strings(left.attributes, right.attributes),
|
||||
features=_merge_features(left.features + right.features),
|
||||
evidence=_merge_evidence(left.evidence + right.evidence),
|
||||
)
|
||||
@@ -128,6 +132,8 @@ def _merge_features(
|
||||
location=_preferred_text(existing.location, feature.location),
|
||||
confidence=max(existing.confidence, feature.confidence),
|
||||
source_refs=_merge_source_refs(existing.source_refs, feature.source_refs),
|
||||
primary_class=_preferred_text(existing.primary_class, feature.primary_class),
|
||||
attributes=_merge_strings(existing.attributes, feature.attributes),
|
||||
)
|
||||
return merged
|
||||
|
||||
|
||||
Reference in New Issue
Block a user