generated from coulomb/repo-seed
baseline repo characteristics no longer crowd the candidate graph
This commit is contained in:
@@ -100,10 +100,6 @@ class CandidateGraphGenerator:
|
||||
)
|
||||
|
||||
capabilities: list[CandidateCapabilityDraft] = []
|
||||
if interfaces:
|
||||
capabilities.append(
|
||||
self._interface_capability(interfaces, tests, examples, docs, chunks)
|
||||
)
|
||||
capabilities.extend(
|
||||
self._intent_capabilities(intent_facts, chunks, tests, examples, docs)
|
||||
)
|
||||
@@ -127,31 +123,9 @@ class CandidateGraphGenerator:
|
||||
docs,
|
||||
)
|
||||
)
|
||||
if manifests or frameworks or languages:
|
||||
if interfaces and not capabilities:
|
||||
capabilities.append(
|
||||
CandidateCapabilityDraft(
|
||||
name="Describe Repository Structure",
|
||||
description=(
|
||||
"Summarize detected languages, package manifests, and framework "
|
||||
"hints as structural context for review."
|
||||
),
|
||||
inputs=[],
|
||||
outputs=["repository structure summary"],
|
||||
confidence=self._structure_confidence(
|
||||
manifests=manifests,
|
||||
frameworks=frameworks,
|
||||
languages=languages,
|
||||
docs=docs,
|
||||
),
|
||||
source_refs=self._source_refs(manifests + frameworks + languages),
|
||||
primary_class="repository-structure",
|
||||
attributes=self._structure_attributes(
|
||||
manifests,
|
||||
frameworks,
|
||||
languages,
|
||||
),
|
||||
evidence=self._evidence(tests, examples, docs),
|
||||
)
|
||||
self._interface_capability(interfaces, tests, examples, docs, chunks)
|
||||
)
|
||||
|
||||
return [
|
||||
@@ -356,7 +330,10 @@ class CandidateGraphGenerator:
|
||||
continue
|
||||
if line.startswith("#"):
|
||||
heading = line.lstrip("#").strip().lower()
|
||||
in_capability_section = "capabilit" in heading
|
||||
in_capability_section = (
|
||||
"capabilit" in heading
|
||||
or heading in {"primary utility", "core utility"}
|
||||
)
|
||||
continue
|
||||
if not in_capability_section:
|
||||
continue
|
||||
@@ -367,11 +344,23 @@ class CandidateGraphGenerator:
|
||||
return items
|
||||
|
||||
def _intent_capability_name(self, text: str) -> str:
|
||||
lowered = re.sub(r"[*_`]", "", text.lower())
|
||||
if "continuous connectivity" in lowered and "remote systems" in lowered:
|
||||
return "Maintain Continuous Connectivity Between Remote Systems And Central Hub"
|
||||
if "observable" in lowered and "auditable" in lowered and "controllable" in lowered:
|
||||
return "Make Connectivity Observable Auditable And Controllable"
|
||||
if "cli tool" in lowered and "mcp" in lowered:
|
||||
return "Expose CLI And MCP Accessible Service"
|
||||
candidate = re.split(r"\s+-\s+|\s*:\s*|[.!?]\s+", text.strip(), maxsplit=1)[0]
|
||||
candidate = candidate.strip(" .:-")
|
||||
if not candidate:
|
||||
return ""
|
||||
return self._title_from_words(candidate.split()[:8])
|
||||
words = candidate.split()
|
||||
if words:
|
||||
words[0] = self._imperative_verb(words[0])
|
||||
while words and words[-1].lower().strip(",;:") in {"a", "an", "the", "and", "or", "as", "both"}:
|
||||
words.pop()
|
||||
return self._title_from_words(words[:10])
|
||||
|
||||
def _interface_features(
|
||||
self,
|
||||
@@ -508,16 +497,36 @@ class CandidateGraphGenerator:
|
||||
[
|
||||
repository.name,
|
||||
repository.description or "",
|
||||
" ".join(chunk.text[:600] for chunk in chunks if chunk.kind == "documentation"),
|
||||
" ".join(f"{fact.kind} {fact.name} {fact.value}" for fact in facts),
|
||||
" ".join(
|
||||
chunk.text[:600]
|
||||
for chunk in chunks
|
||||
if chunk.kind in {"intent", "documentation"}
|
||||
and chunk.metadata.get("source_role") != "agent_guidance"
|
||||
),
|
||||
" ".join(
|
||||
f"{fact.kind} {fact.name} {fact.value}"
|
||||
for fact in facts
|
||||
if not (
|
||||
fact.kind == "llm_provider"
|
||||
and self._utility_relationship(fact) not in {"owned", "facade", "adapter"}
|
||||
)
|
||||
),
|
||||
]
|
||||
).lower()
|
||||
attributes: list[str] = []
|
||||
if any(token in text for token in ("repository", "repo", "registry")):
|
||||
attributes.append("repository")
|
||||
if any(token in text for token in ("ssh", "tunnel", "reverse tunnel", "remote access", "connectivity")):
|
||||
attributes.extend(["remote-access", "connectivity"])
|
||||
if any(token in text for token in ("audit", "health check", "lifecycle", "ops", "operator")):
|
||||
attributes.append("operations")
|
||||
return "it-operations", self._unique(attributes)
|
||||
if any(token in text for token in ("ability", "capability", "feature")):
|
||||
return "repository-intelligence", self._unique(attributes + ["capability-mapping"])
|
||||
if any(token in text for token in ("llm", "openrouter", "claude", "model provider")):
|
||||
promotable_llm = any(
|
||||
fact.kind == "llm_provider"
|
||||
and self._utility_relationship(fact) in {"owned", "facade", "adapter"}
|
||||
for fact in facts
|
||||
)
|
||||
if promotable_llm:
|
||||
return "ai-integration", self._unique(attributes + ["llm-provider"])
|
||||
if any(fact.kind == "interface" for fact in facts):
|
||||
attributes.append("interface")
|
||||
@@ -777,6 +786,9 @@ class CandidateGraphGenerator:
|
||||
repository: Repository,
|
||||
chunks: list[ContentChunk],
|
||||
) -> str:
|
||||
ops_name = self._operations_ability_name(chunks)
|
||||
if ops_name:
|
||||
return ops_name
|
||||
purpose_text = self._document_purpose_sentence(chunks) or repository.description
|
||||
if purpose_text:
|
||||
normalized = self._imperative_purpose(purpose_text)
|
||||
@@ -794,9 +806,24 @@ class CandidateGraphGenerator:
|
||||
return paragraph
|
||||
return ""
|
||||
|
||||
def _operations_ability_name(self, chunks: list[ContentChunk]) -> str:
|
||||
text = " ".join(
|
||||
chunk.text
|
||||
for chunk in self._documentation_chunks(chunks)
|
||||
if chunk.kind == "intent"
|
||||
).lower()
|
||||
if "ssh reverse tunnel" in text or "ssh reverse tunneling" in text:
|
||||
return "Manage SSH Reverse Tunnel Connectivity"
|
||||
return ""
|
||||
|
||||
def _imperative_purpose(self, text: str) -> str:
|
||||
cleaned = re.sub(r"\s+", " ", text.strip())
|
||||
cleaned = re.split(r"[.!?]\s+", cleaned, maxsplit=1)[0]
|
||||
cleaned = re.sub(
|
||||
r"(?i)^this\s+repository\s+exists\s+to\s+provide\s+(?:an?\s+)?",
|
||||
"Provide ",
|
||||
cleaned,
|
||||
)
|
||||
cleaned = re.sub(r"^[A-Z][A-Za-z0-9_-]*\s+(?:is|provides|offers)\s+", "", cleaned)
|
||||
cleaned = cleaned.strip(" .:-")
|
||||
if not cleaned:
|
||||
@@ -816,6 +843,8 @@ class CandidateGraphGenerator:
|
||||
}
|
||||
if lower in irregular:
|
||||
return irregular[lower]
|
||||
if lower in {"this"}:
|
||||
return lower
|
||||
if lower.endswith("ies") and len(lower) > 4:
|
||||
return f"{lower[:-3]}y"
|
||||
if lower.endswith(("des", "ses", "tes", "ves", "zes")) and len(lower) > 4:
|
||||
|
||||
Reference in New Issue
Block a user