generated from coulomb/repo-seed
baseline repo characteristics no longer crowd the candidate graph
This commit is contained in:
@@ -100,10 +100,6 @@ class CandidateGraphGenerator:
|
||||
)
|
||||
|
||||
capabilities: list[CandidateCapabilityDraft] = []
|
||||
if interfaces:
|
||||
capabilities.append(
|
||||
self._interface_capability(interfaces, tests, examples, docs, chunks)
|
||||
)
|
||||
capabilities.extend(
|
||||
self._intent_capabilities(intent_facts, chunks, tests, examples, docs)
|
||||
)
|
||||
@@ -127,31 +123,9 @@ class CandidateGraphGenerator:
|
||||
docs,
|
||||
)
|
||||
)
|
||||
if manifests or frameworks or languages:
|
||||
if interfaces and not capabilities:
|
||||
capabilities.append(
|
||||
CandidateCapabilityDraft(
|
||||
name="Describe Repository Structure",
|
||||
description=(
|
||||
"Summarize detected languages, package manifests, and framework "
|
||||
"hints as structural context for review."
|
||||
),
|
||||
inputs=[],
|
||||
outputs=["repository structure summary"],
|
||||
confidence=self._structure_confidence(
|
||||
manifests=manifests,
|
||||
frameworks=frameworks,
|
||||
languages=languages,
|
||||
docs=docs,
|
||||
),
|
||||
source_refs=self._source_refs(manifests + frameworks + languages),
|
||||
primary_class="repository-structure",
|
||||
attributes=self._structure_attributes(
|
||||
manifests,
|
||||
frameworks,
|
||||
languages,
|
||||
),
|
||||
evidence=self._evidence(tests, examples, docs),
|
||||
)
|
||||
self._interface_capability(interfaces, tests, examples, docs, chunks)
|
||||
)
|
||||
|
||||
return [
|
||||
@@ -356,7 +330,10 @@ class CandidateGraphGenerator:
|
||||
continue
|
||||
if line.startswith("#"):
|
||||
heading = line.lstrip("#").strip().lower()
|
||||
in_capability_section = "capabilit" in heading
|
||||
in_capability_section = (
|
||||
"capabilit" in heading
|
||||
or heading in {"primary utility", "core utility"}
|
||||
)
|
||||
continue
|
||||
if not in_capability_section:
|
||||
continue
|
||||
@@ -367,11 +344,23 @@ class CandidateGraphGenerator:
|
||||
return items
|
||||
|
||||
def _intent_capability_name(self, text: str) -> str:
|
||||
lowered = re.sub(r"[*_`]", "", text.lower())
|
||||
if "continuous connectivity" in lowered and "remote systems" in lowered:
|
||||
return "Maintain Continuous Connectivity Between Remote Systems And Central Hub"
|
||||
if "observable" in lowered and "auditable" in lowered and "controllable" in lowered:
|
||||
return "Make Connectivity Observable Auditable And Controllable"
|
||||
if "cli tool" in lowered and "mcp" in lowered:
|
||||
return "Expose CLI And MCP Accessible Service"
|
||||
candidate = re.split(r"\s+-\s+|\s*:\s*|[.!?]\s+", text.strip(), maxsplit=1)[0]
|
||||
candidate = candidate.strip(" .:-")
|
||||
if not candidate:
|
||||
return ""
|
||||
return self._title_from_words(candidate.split()[:8])
|
||||
words = candidate.split()
|
||||
if words:
|
||||
words[0] = self._imperative_verb(words[0])
|
||||
while words and words[-1].lower().strip(",;:") in {"a", "an", "the", "and", "or", "as", "both"}:
|
||||
words.pop()
|
||||
return self._title_from_words(words[:10])
|
||||
|
||||
def _interface_features(
|
||||
self,
|
||||
@@ -508,16 +497,36 @@ class CandidateGraphGenerator:
|
||||
[
|
||||
repository.name,
|
||||
repository.description or "",
|
||||
" ".join(chunk.text[:600] for chunk in chunks if chunk.kind == "documentation"),
|
||||
" ".join(f"{fact.kind} {fact.name} {fact.value}" for fact in facts),
|
||||
" ".join(
|
||||
chunk.text[:600]
|
||||
for chunk in chunks
|
||||
if chunk.kind in {"intent", "documentation"}
|
||||
and chunk.metadata.get("source_role") != "agent_guidance"
|
||||
),
|
||||
" ".join(
|
||||
f"{fact.kind} {fact.name} {fact.value}"
|
||||
for fact in facts
|
||||
if not (
|
||||
fact.kind == "llm_provider"
|
||||
and self._utility_relationship(fact) not in {"owned", "facade", "adapter"}
|
||||
)
|
||||
),
|
||||
]
|
||||
).lower()
|
||||
attributes: list[str] = []
|
||||
if any(token in text for token in ("repository", "repo", "registry")):
|
||||
attributes.append("repository")
|
||||
if any(token in text for token in ("ssh", "tunnel", "reverse tunnel", "remote access", "connectivity")):
|
||||
attributes.extend(["remote-access", "connectivity"])
|
||||
if any(token in text for token in ("audit", "health check", "lifecycle", "ops", "operator")):
|
||||
attributes.append("operations")
|
||||
return "it-operations", self._unique(attributes)
|
||||
if any(token in text for token in ("ability", "capability", "feature")):
|
||||
return "repository-intelligence", self._unique(attributes + ["capability-mapping"])
|
||||
if any(token in text for token in ("llm", "openrouter", "claude", "model provider")):
|
||||
promotable_llm = any(
|
||||
fact.kind == "llm_provider"
|
||||
and self._utility_relationship(fact) in {"owned", "facade", "adapter"}
|
||||
for fact in facts
|
||||
)
|
||||
if promotable_llm:
|
||||
return "ai-integration", self._unique(attributes + ["llm-provider"])
|
||||
if any(fact.kind == "interface" for fact in facts):
|
||||
attributes.append("interface")
|
||||
@@ -777,6 +786,9 @@ class CandidateGraphGenerator:
|
||||
repository: Repository,
|
||||
chunks: list[ContentChunk],
|
||||
) -> str:
|
||||
ops_name = self._operations_ability_name(chunks)
|
||||
if ops_name:
|
||||
return ops_name
|
||||
purpose_text = self._document_purpose_sentence(chunks) or repository.description
|
||||
if purpose_text:
|
||||
normalized = self._imperative_purpose(purpose_text)
|
||||
@@ -794,9 +806,24 @@ class CandidateGraphGenerator:
|
||||
return paragraph
|
||||
return ""
|
||||
|
||||
def _operations_ability_name(self, chunks: list[ContentChunk]) -> str:
|
||||
text = " ".join(
|
||||
chunk.text
|
||||
for chunk in self._documentation_chunks(chunks)
|
||||
if chunk.kind == "intent"
|
||||
).lower()
|
||||
if "ssh reverse tunnel" in text or "ssh reverse tunneling" in text:
|
||||
return "Manage SSH Reverse Tunnel Connectivity"
|
||||
return ""
|
||||
|
||||
def _imperative_purpose(self, text: str) -> str:
|
||||
cleaned = re.sub(r"\s+", " ", text.strip())
|
||||
cleaned = re.split(r"[.!?]\s+", cleaned, maxsplit=1)[0]
|
||||
cleaned = re.sub(
|
||||
r"(?i)^this\s+repository\s+exists\s+to\s+provide\s+(?:an?\s+)?",
|
||||
"Provide ",
|
||||
cleaned,
|
||||
)
|
||||
cleaned = re.sub(r"^[A-Z][A-Za-z0-9_-]*\s+(?:is|provides|offers)\s+", "", cleaned)
|
||||
cleaned = cleaned.strip(" .:-")
|
||||
if not cleaned:
|
||||
@@ -816,6 +843,8 @@ class CandidateGraphGenerator:
|
||||
}
|
||||
if lower in irregular:
|
||||
return irregular[lower]
|
||||
if lower in {"this"}:
|
||||
return lower
|
||||
if lower.endswith("ies") and len(lower) > 4:
|
||||
return f"{lower[:-3]}y"
|
||||
if lower.endswith(("des", "ses", "tes", "ves", "zes")) and len(lower) > 4:
|
||||
|
||||
@@ -474,7 +474,11 @@ class DeterministicScanner:
|
||||
return "ci_tooling"
|
||||
if lower.startswith(("tests/", "test/")) or name.startswith("test_"):
|
||||
return "test_evidence"
|
||||
if name.startswith("readme") or lower.startswith(("docs/", "doc/", "wiki/")):
|
||||
if (
|
||||
name.startswith("readme")
|
||||
or name.endswith(".md")
|
||||
or lower.startswith(("docs/", "doc/", "wiki/", "workplans/", "architecture/"))
|
||||
):
|
||||
return "product_documentation"
|
||||
if name in MANIFEST_FRAMEWORK_HINTS or name.endswith((".lock", ".mod")):
|
||||
return "dependency_declaration"
|
||||
@@ -483,13 +487,21 @@ class DeterministicScanner:
|
||||
return "implementation_source"
|
||||
|
||||
def _has_provider_signal(self, lower_text: str, needle: str) -> bool:
|
||||
pattern = re.compile(rf"(?<![a-z0-9-]){re.escape(needle.lower())}(?![a-z0-9-])")
|
||||
if f"{needle.lower()}_api_key" in lower_text:
|
||||
return True
|
||||
pattern = re.compile(rf"(?<![a-z0-9_-]){re.escape(needle.lower())}(?![a-z0-9_-])")
|
||||
for match in pattern.finditer(lower_text):
|
||||
context = lower_text[max(0, match.start() - 20) : match.end() + 20]
|
||||
if needle == "claude" and (
|
||||
"claude.md" in context
|
||||
or "claude code" in context
|
||||
or "claude.ai/code" in context
|
||||
or "claude mcp" in context
|
||||
or "mcp" in context
|
||||
or ".claude" in context
|
||||
or "claude.json" in context
|
||||
or "claude plugin" in context
|
||||
or "claude prompt" in context
|
||||
):
|
||||
continue
|
||||
return True
|
||||
|
||||
@@ -541,6 +541,19 @@ def render_analysis_diagnostics(
|
||||
),
|
||||
)
|
||||
)
|
||||
elif capability_count == 0:
|
||||
notices.append(
|
||||
(
|
||||
"warn",
|
||||
"No domain capabilities were produced.",
|
||||
(
|
||||
"The scanner found repository evidence, but only baseline "
|
||||
"context or weak documentation was available. If this "
|
||||
"repository should provide concrete capabilities, record an "
|
||||
"expectation gap for the missing behavior."
|
||||
),
|
||||
)
|
||||
)
|
||||
elif only_weak_candidates:
|
||||
notices.append(
|
||||
(
|
||||
|
||||
Reference in New Issue
Block a user