generated from coulomb/repo-seed
Implement scope-derived candidate review infrastructure
This commit is contained in:
@@ -275,6 +275,8 @@ class CandidateGraphGenerator:
|
||||
manifests = self._facts(facts, "manifest")
|
||||
frameworks = self._facts(facts, "framework")
|
||||
languages = self._facts(facts, "language")
|
||||
configs = self._facts(facts, "config")
|
||||
scope_facts = self._facts(facts, "scope")
|
||||
llm_providers = self._facts(facts, "llm_provider")
|
||||
credential_configs = self._facts(facts, "credential_config")
|
||||
provider_registries = self._facts(facts, "provider_registry")
|
||||
@@ -286,7 +288,7 @@ class CandidateGraphGenerator:
|
||||
chunks,
|
||||
)
|
||||
|
||||
ability_sources = docs or manifests or languages
|
||||
ability_sources = docs or scope_facts or manifests or languages or configs
|
||||
ability = CandidateAbilityDraft(
|
||||
name=self._ability_name(repository, chunks),
|
||||
description=self._ability_description(chunks),
|
||||
@@ -308,6 +310,15 @@ class CandidateGraphGenerator:
|
||||
capabilities.extend(
|
||||
self._intent_capabilities(intent_facts, chunks, tests, examples, docs)
|
||||
)
|
||||
capabilities.extend(
|
||||
self._scope_capabilities(
|
||||
scope_facts,
|
||||
chunks,
|
||||
tests,
|
||||
examples,
|
||||
allow_summary_fallback=not intent_facts,
|
||||
)
|
||||
)
|
||||
capabilities.extend(
|
||||
self._repo_scoping_native_capabilities(
|
||||
repository,
|
||||
@@ -347,6 +358,18 @@ class CandidateGraphGenerator:
|
||||
capabilities.append(
|
||||
self._interface_capability(interfaces, tests, examples, docs, chunks)
|
||||
)
|
||||
if not capabilities:
|
||||
capabilities.extend(
|
||||
self._fact_derived_capabilities(
|
||||
configs=configs,
|
||||
manifests=manifests,
|
||||
frameworks=frameworks,
|
||||
languages=languages,
|
||||
docs=docs,
|
||||
tests=tests,
|
||||
chunks=chunks,
|
||||
)
|
||||
)
|
||||
|
||||
return [
|
||||
CandidateAbilityDraft(
|
||||
@@ -582,6 +605,257 @@ class CandidateGraphGenerator:
|
||||
words.pop()
|
||||
return self._title_from_words(words[:10])
|
||||
|
||||
def _scope_capabilities(
|
||||
self,
|
||||
scope_facts: list[ObservedFact],
|
||||
chunks: list[ContentChunk],
|
||||
tests: list[ObservedFact],
|
||||
examples: list[ObservedFact],
|
||||
*,
|
||||
allow_summary_fallback: bool = True,
|
||||
) -> list[CandidateCapabilityDraft]:
|
||||
scope_chunks = [
|
||||
chunk
|
||||
for chunk in chunks
|
||||
if chunk.kind == "scope"
|
||||
or chunk.metadata.get("source_role") == "derived_scope"
|
||||
or chunk.path.lower().endswith("scope.md")
|
||||
]
|
||||
if not scope_chunks:
|
||||
return []
|
||||
source_refs = self._source_refs(scope_facts)
|
||||
capabilities: list[CandidateCapabilityDraft] = []
|
||||
seen: set[str] = set()
|
||||
for block in self._scope_capability_blocks(scope_chunks):
|
||||
title = block.get("title", "").strip()
|
||||
if not title:
|
||||
continue
|
||||
key = title.lower()
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
capability_type = block.get("type", "scope-derived").strip() or "scope-derived"
|
||||
description = block.get("description", "").strip()
|
||||
keywords = self._scope_keywords(block.get("keywords", ""))
|
||||
attributes = self._unique(
|
||||
[
|
||||
capability_type,
|
||||
*keywords,
|
||||
"scope-derived",
|
||||
"current-state",
|
||||
"review-required-scope",
|
||||
]
|
||||
)
|
||||
feature = CandidateFeatureDraft(
|
||||
name=title,
|
||||
type=capability_type,
|
||||
location="SCOPE.md",
|
||||
confidence=0.55,
|
||||
source_refs=source_refs,
|
||||
primary_class=capability_type,
|
||||
attributes=self._unique(
|
||||
[capability_type, "scope-defined", "review-required-scope"]
|
||||
),
|
||||
)
|
||||
capabilities.append(
|
||||
CandidateCapabilityDraft(
|
||||
name=title,
|
||||
description=(
|
||||
"Reviewable current-state capability extracted from "
|
||||
f"SCOPE.md: {description or title}"
|
||||
),
|
||||
inputs=[],
|
||||
outputs=[title],
|
||||
confidence=self._confidence(
|
||||
0.45,
|
||||
[
|
||||
(0.10, bool(description)),
|
||||
(0.05, bool(keywords)),
|
||||
(0.05, bool(tests)),
|
||||
(0.05, bool(examples)),
|
||||
],
|
||||
),
|
||||
source_refs=source_refs,
|
||||
primary_class=capability_type,
|
||||
attributes=attributes,
|
||||
features=[feature],
|
||||
evidence=[
|
||||
CandidateEvidenceDraft(
|
||||
type="scope-current-state",
|
||||
reference="SCOPE.md",
|
||||
strength="medium",
|
||||
source_refs=source_refs,
|
||||
)
|
||||
],
|
||||
)
|
||||
)
|
||||
if capabilities or not allow_summary_fallback:
|
||||
return capabilities
|
||||
fallback_name = self._scope_summary_capability_name(scope_chunks)
|
||||
if not fallback_name:
|
||||
return []
|
||||
return [
|
||||
CandidateCapabilityDraft(
|
||||
name=fallback_name,
|
||||
description=(
|
||||
"Reviewable current-state capability inferred from SCOPE.md "
|
||||
"summary text. A curator should split this into more precise "
|
||||
"capabilities when reviewing."
|
||||
),
|
||||
inputs=[],
|
||||
outputs=[fallback_name],
|
||||
confidence=0.45,
|
||||
source_refs=source_refs,
|
||||
primary_class="scope-derived",
|
||||
attributes=[
|
||||
"scope-derived",
|
||||
"current-state",
|
||||
"review-required-scope",
|
||||
],
|
||||
evidence=[
|
||||
CandidateEvidenceDraft(
|
||||
type="scope-current-state",
|
||||
reference="SCOPE.md",
|
||||
strength="weak",
|
||||
source_refs=source_refs,
|
||||
)
|
||||
],
|
||||
)
|
||||
]
|
||||
|
||||
def _scope_capability_blocks(
|
||||
self,
|
||||
chunks: list[ContentChunk],
|
||||
) -> list[dict[str, str]]:
|
||||
blocks: list[dict[str, str]] = []
|
||||
in_block = False
|
||||
current: dict[str, str] = {}
|
||||
current_key = ""
|
||||
for chunk in sorted(chunks, key=lambda item: (item.path, item.start_line)):
|
||||
for raw_line in chunk.text.splitlines():
|
||||
line = raw_line.rstrip()
|
||||
stripped = line.strip()
|
||||
if stripped.startswith("```capability"):
|
||||
in_block = True
|
||||
current = {}
|
||||
current_key = ""
|
||||
continue
|
||||
if in_block and stripped.startswith("```"):
|
||||
if current:
|
||||
blocks.append(current)
|
||||
in_block = False
|
||||
current = {}
|
||||
current_key = ""
|
||||
continue
|
||||
if not in_block:
|
||||
continue
|
||||
key, separator, value = stripped.partition(":")
|
||||
if separator and re.match(r"^[A-Za-z_][A-Za-z0-9_-]*$", key):
|
||||
current_key = key.lower()
|
||||
current[current_key] = value.strip().strip('"')
|
||||
elif current_key and stripped:
|
||||
current[current_key] = (
|
||||
f"{current[current_key]} {stripped.strip()}"
|
||||
).strip()
|
||||
return blocks
|
||||
|
||||
def _scope_keywords(self, value: str) -> list[str]:
|
||||
cleaned = value.strip()
|
||||
if cleaned.startswith("[") and cleaned.endswith("]"):
|
||||
cleaned = cleaned[1:-1]
|
||||
return [
|
||||
item.strip(" `\"'")
|
||||
for item in cleaned.split(",")
|
||||
if item.strip(" `\"'")
|
||||
][:8]
|
||||
|
||||
def _scope_summary_capability_name(self, chunks: list[ContentChunk]) -> str:
|
||||
one_liner = self._scope_one_liner(chunks)
|
||||
if one_liner:
|
||||
return self._imperative_purpose(one_liner)
|
||||
return ""
|
||||
|
||||
def _fact_derived_capabilities(
|
||||
self,
|
||||
*,
|
||||
configs: list[ObservedFact],
|
||||
manifests: list[ObservedFact],
|
||||
frameworks: list[ObservedFact],
|
||||
languages: list[ObservedFact],
|
||||
docs: list[ObservedFact],
|
||||
tests: list[ObservedFact],
|
||||
chunks: list[ContentChunk],
|
||||
) -> list[CandidateCapabilityDraft]:
|
||||
if not configs:
|
||||
return []
|
||||
capability_facts = configs + manifests + frameworks + languages
|
||||
if not capability_facts:
|
||||
return []
|
||||
features: list[CandidateFeatureDraft] = []
|
||||
for label, kind, facts in (
|
||||
("Manage Repository Configuration", "configuration", configs),
|
||||
("Declare Runtime And Package Manifests", "manifest", manifests),
|
||||
("Use Detected Frameworks", "framework", frameworks),
|
||||
("Provide Implementation In Detected Languages", "implementation", languages),
|
||||
):
|
||||
if not facts:
|
||||
continue
|
||||
features.append(
|
||||
CandidateFeatureDraft(
|
||||
name=label,
|
||||
type=kind,
|
||||
location=self._grouped_location(facts),
|
||||
confidence=0.45,
|
||||
source_refs=self._source_refs(facts),
|
||||
primary_class=kind,
|
||||
attributes=[kind, "fact-derived", "review-required"],
|
||||
)
|
||||
)
|
||||
if not features:
|
||||
return []
|
||||
name = self._fact_derived_capability_name(chunks, features)
|
||||
return [
|
||||
CandidateCapabilityDraft(
|
||||
name=name,
|
||||
description=(
|
||||
"Reviewable capability inferred from deterministic facts. "
|
||||
"This fills the hierarchy when no stronger intent, scope "
|
||||
"capability, or interface candidate exists."
|
||||
),
|
||||
inputs=self._feature_inputs(features),
|
||||
outputs=self._feature_outputs(features),
|
||||
confidence=self._confidence(
|
||||
0.35,
|
||||
[
|
||||
(0.10, bool(configs)),
|
||||
(0.10, bool(manifests)),
|
||||
(0.05, bool(frameworks)),
|
||||
(0.05, bool(tests)),
|
||||
(0.05, bool(docs)),
|
||||
],
|
||||
),
|
||||
source_refs=self._source_refs(capability_facts),
|
||||
primary_class="fact-derived",
|
||||
attributes=["fact-derived", "review-required", "partial-hierarchy"],
|
||||
features=features,
|
||||
evidence=self._evidence(tests, [], docs),
|
||||
)
|
||||
]
|
||||
|
||||
def _fact_derived_capability_name(
|
||||
self,
|
||||
chunks: list[ContentChunk],
|
||||
features: list[CandidateFeatureDraft],
|
||||
) -> str:
|
||||
scope_name = self._scope_summary_capability_name(chunks)
|
||||
if scope_name:
|
||||
return scope_name
|
||||
if any(feature.type == "configuration" for feature in features):
|
||||
return "Manage Repository Configuration"
|
||||
if any(feature.type == "manifest" for feature in features):
|
||||
return "Declare Repository Runtime"
|
||||
return "Describe Repository Implementation"
|
||||
|
||||
def _repo_scoping_native_capabilities(
|
||||
self,
|
||||
repository: Repository,
|
||||
@@ -1219,40 +1493,110 @@ class CandidateGraphGenerator:
|
||||
ops_name = self._operations_ability_name(chunks)
|
||||
if ops_name:
|
||||
return ops_name
|
||||
purpose_text = self._document_purpose_sentence(chunks) or repository.description
|
||||
purpose_text = (
|
||||
self._intent_purpose_sentence(chunks)
|
||||
or self._scope_one_liner(chunks)
|
||||
or self._documentation_purpose_sentence(chunks)
|
||||
or repository.description
|
||||
)
|
||||
if purpose_text:
|
||||
normalized = self._imperative_purpose(purpose_text)
|
||||
if normalized:
|
||||
return normalized
|
||||
return f"Support {self._humanize_identifier(repository.name)}"
|
||||
|
||||
def _document_purpose_sentence(self, chunks: list[ContentChunk]) -> str:
|
||||
for chunk in self._purpose_chunks(chunks):
|
||||
def _intent_purpose_sentence(self, chunks: list[ContentChunk]) -> str:
|
||||
return self._purpose_sentence_for_chunks(
|
||||
[
|
||||
chunk
|
||||
for chunk in self._purpose_chunks(chunks)
|
||||
if chunk.kind == "intent"
|
||||
or chunk.metadata.get("source_role") == "intent_summary"
|
||||
or chunk.path.lower().endswith("intent.md")
|
||||
]
|
||||
)
|
||||
|
||||
def _documentation_purpose_sentence(self, chunks: list[ContentChunk]) -> str:
|
||||
return self._purpose_sentence_for_chunks(
|
||||
[
|
||||
chunk
|
||||
for chunk in self._purpose_chunks(chunks)
|
||||
if chunk.kind == "documentation"
|
||||
and chunk.metadata.get("source_role") != "derived_scope"
|
||||
and not chunk.path.lower().endswith("scope.md")
|
||||
]
|
||||
)
|
||||
|
||||
def _purpose_sentence_for_chunks(self, chunks: list[ContentChunk]) -> str:
|
||||
for chunk in chunks:
|
||||
if chunk.kind not in {"intent", "documentation"}:
|
||||
continue
|
||||
lines = [line.strip() for line in chunk.text.splitlines() if line.strip()]
|
||||
paragraph = next((line for line in lines if not line.startswith("#")), "")
|
||||
if paragraph:
|
||||
if paragraph and not self._is_template_boilerplate(paragraph):
|
||||
return paragraph
|
||||
return ""
|
||||
|
||||
def _scope_one_liner(self, chunks: list[ContentChunk]) -> str:
|
||||
for chunk in sorted(chunks, key=lambda item: (item.path, item.start_line)):
|
||||
if not (
|
||||
chunk.kind == "scope"
|
||||
or chunk.metadata.get("source_role") == "derived_scope"
|
||||
or chunk.path.lower().endswith("scope.md")
|
||||
):
|
||||
continue
|
||||
lines = chunk.text.splitlines()
|
||||
for index, raw_line in enumerate(lines):
|
||||
if raw_line.strip().lower() == "## one-liner":
|
||||
for following in lines[index + 1 :]:
|
||||
candidate = following.strip()
|
||||
if not candidate or candidate.startswith("---"):
|
||||
continue
|
||||
if candidate.startswith(">"):
|
||||
continue
|
||||
return candidate.strip(" .")
|
||||
before_first_section: list[str] = []
|
||||
for raw_line in lines:
|
||||
candidate = raw_line.strip()
|
||||
if candidate.startswith("## "):
|
||||
break
|
||||
before_first_section.append(candidate)
|
||||
for candidate in before_first_section:
|
||||
if (
|
||||
candidate
|
||||
and not candidate.startswith("#")
|
||||
and not candidate.startswith(">")
|
||||
and not candidate.startswith("---")
|
||||
and not self._is_template_boilerplate(candidate)
|
||||
):
|
||||
return candidate.strip(" .")
|
||||
return ""
|
||||
|
||||
def _is_template_boilerplate(self, text: str) -> bool:
|
||||
lowered = text.lower()
|
||||
return (
|
||||
"git repository template to bootstrap" in lowered
|
||||
or "this file helps you quickly understand" in lowered
|
||||
or "intentionally lightweight and may be incomplete" in lowered
|
||||
)
|
||||
|
||||
def _purpose_chunks(self, chunks: list[ContentChunk]) -> list[ContentChunk]:
|
||||
def priority(chunk: ContentChunk) -> tuple[int, str, int]:
|
||||
role = chunk.metadata.get("source_role")
|
||||
path = chunk.path.lower()
|
||||
if role == "intent_summary" or path.endswith("intent.md"):
|
||||
return (0, path, chunk.start_line)
|
||||
if role == "product_documentation" or path.startswith("readme"):
|
||||
return (1, path, chunk.start_line)
|
||||
if role == "derived_scope" or path.endswith("scope.md"):
|
||||
return (3, path, chunk.start_line)
|
||||
return (2, path, chunk.start_line)
|
||||
return (1, path, chunk.start_line)
|
||||
if role == "product_documentation" or path.startswith("readme"):
|
||||
return (2, path, chunk.start_line)
|
||||
return (3, path, chunk.start_line)
|
||||
|
||||
return sorted(
|
||||
[
|
||||
chunk
|
||||
for chunk in chunks
|
||||
if chunk.kind in {"intent", "documentation"}
|
||||
if chunk.kind in {"intent", "documentation", "scope"}
|
||||
and chunk.metadata.get("source_role") != "agent_guidance"
|
||||
],
|
||||
key=priority,
|
||||
@@ -1284,9 +1628,11 @@ class CandidateGraphGenerator:
|
||||
if not words:
|
||||
return ""
|
||||
words[0] = self._imperative_verb(words[0])
|
||||
return self._title_from_words(words[:8])
|
||||
return self._title_from_words(words[:10])
|
||||
|
||||
def _imperative_verb(self, word: str) -> str:
|
||||
if word.isupper():
|
||||
return word
|
||||
lower = word.lower().strip(",;:")
|
||||
irregular = {
|
||||
"does": "do",
|
||||
@@ -1313,7 +1659,7 @@ class CandidateGraphGenerator:
|
||||
for word in words
|
||||
]
|
||||
return " ".join(
|
||||
word[:1].upper() + word[1:]
|
||||
word if word.isupper() else word[:1].upper() + word[1:]
|
||||
for word in cleaned_words
|
||||
if word
|
||||
)
|
||||
@@ -1341,17 +1687,37 @@ class CandidateGraphGenerator:
|
||||
lines = [line.strip() for line in chunk.text.splitlines() if line.strip()]
|
||||
if not lines:
|
||||
continue
|
||||
if chunk.kind == "scope" or chunk.metadata.get("source_role") == "derived_scope":
|
||||
one_liner = self._scope_one_liner([chunk])
|
||||
if one_liner:
|
||||
return f"SCOPE. {one_liner}"
|
||||
heading = next((line.lstrip("#").strip() for line in lines if line.startswith("#")), "")
|
||||
paragraph = next((line for line in lines if not line.startswith("#")), "")
|
||||
if self._is_template_boilerplate(paragraph):
|
||||
paragraph = ""
|
||||
if heading and paragraph:
|
||||
return f"{heading}. {paragraph}"
|
||||
return heading or paragraph
|
||||
return ""
|
||||
|
||||
def _documentation_chunks(self, chunks: list[ContentChunk]) -> list[ContentChunk]:
|
||||
def priority(chunk: ContentChunk) -> tuple[int, str, int]:
|
||||
role = chunk.metadata.get("source_role")
|
||||
path = chunk.path.lower()
|
||||
if chunk.kind == "intent" or role == "intent_summary" or path.endswith("intent.md"):
|
||||
return (0, path, chunk.start_line)
|
||||
if chunk.kind == "scope" or role == "derived_scope" or path.endswith("scope.md"):
|
||||
return (1, path, chunk.start_line)
|
||||
return (2, path, chunk.start_line)
|
||||
|
||||
return sorted(
|
||||
[chunk for chunk in chunks if chunk.kind in {"intent", "documentation"}],
|
||||
key=lambda chunk: (0 if chunk.kind == "intent" else 1, chunk.path, chunk.start_line),
|
||||
[
|
||||
chunk
|
||||
for chunk in chunks
|
||||
if chunk.kind in {"intent", "documentation", "scope"}
|
||||
and chunk.metadata.get("source_role") != "agent_guidance"
|
||||
],
|
||||
key=priority,
|
||||
)
|
||||
|
||||
def _interface_summary(self, chunks: list[ContentChunk]) -> str:
|
||||
|
||||
Reference in New Issue
Block a user