Implement scope-derived candidate review infrastructure

This commit is contained in:
2026-05-16 00:26:29 +02:00
parent f4d782c997
commit ba2228e889
14 changed files with 1740 additions and 39 deletions

View File

@@ -275,6 +275,8 @@ class CandidateGraphGenerator:
manifests = self._facts(facts, "manifest")
frameworks = self._facts(facts, "framework")
languages = self._facts(facts, "language")
configs = self._facts(facts, "config")
scope_facts = self._facts(facts, "scope")
llm_providers = self._facts(facts, "llm_provider")
credential_configs = self._facts(facts, "credential_config")
provider_registries = self._facts(facts, "provider_registry")
@@ -286,7 +288,7 @@ class CandidateGraphGenerator:
chunks,
)
ability_sources = docs or manifests or languages
ability_sources = docs or scope_facts or manifests or languages or configs
ability = CandidateAbilityDraft(
name=self._ability_name(repository, chunks),
description=self._ability_description(chunks),
@@ -308,6 +310,15 @@ class CandidateGraphGenerator:
capabilities.extend(
self._intent_capabilities(intent_facts, chunks, tests, examples, docs)
)
capabilities.extend(
self._scope_capabilities(
scope_facts,
chunks,
tests,
examples,
allow_summary_fallback=not intent_facts,
)
)
capabilities.extend(
self._repo_scoping_native_capabilities(
repository,
@@ -347,6 +358,18 @@ class CandidateGraphGenerator:
capabilities.append(
self._interface_capability(interfaces, tests, examples, docs, chunks)
)
if not capabilities:
capabilities.extend(
self._fact_derived_capabilities(
configs=configs,
manifests=manifests,
frameworks=frameworks,
languages=languages,
docs=docs,
tests=tests,
chunks=chunks,
)
)
return [
CandidateAbilityDraft(
@@ -582,6 +605,257 @@ class CandidateGraphGenerator:
words.pop()
return self._title_from_words(words[:10])
def _scope_capabilities(
self,
scope_facts: list[ObservedFact],
chunks: list[ContentChunk],
tests: list[ObservedFact],
examples: list[ObservedFact],
*,
allow_summary_fallback: bool = True,
) -> list[CandidateCapabilityDraft]:
scope_chunks = [
chunk
for chunk in chunks
if chunk.kind == "scope"
or chunk.metadata.get("source_role") == "derived_scope"
or chunk.path.lower().endswith("scope.md")
]
if not scope_chunks:
return []
source_refs = self._source_refs(scope_facts)
capabilities: list[CandidateCapabilityDraft] = []
seen: set[str] = set()
for block in self._scope_capability_blocks(scope_chunks):
title = block.get("title", "").strip()
if not title:
continue
key = title.lower()
if key in seen:
continue
seen.add(key)
capability_type = block.get("type", "scope-derived").strip() or "scope-derived"
description = block.get("description", "").strip()
keywords = self._scope_keywords(block.get("keywords", ""))
attributes = self._unique(
[
capability_type,
*keywords,
"scope-derived",
"current-state",
"review-required-scope",
]
)
feature = CandidateFeatureDraft(
name=title,
type=capability_type,
location="SCOPE.md",
confidence=0.55,
source_refs=source_refs,
primary_class=capability_type,
attributes=self._unique(
[capability_type, "scope-defined", "review-required-scope"]
),
)
capabilities.append(
CandidateCapabilityDraft(
name=title,
description=(
"Reviewable current-state capability extracted from "
f"SCOPE.md: {description or title}"
),
inputs=[],
outputs=[title],
confidence=self._confidence(
0.45,
[
(0.10, bool(description)),
(0.05, bool(keywords)),
(0.05, bool(tests)),
(0.05, bool(examples)),
],
),
source_refs=source_refs,
primary_class=capability_type,
attributes=attributes,
features=[feature],
evidence=[
CandidateEvidenceDraft(
type="scope-current-state",
reference="SCOPE.md",
strength="medium",
source_refs=source_refs,
)
],
)
)
if capabilities or not allow_summary_fallback:
return capabilities
fallback_name = self._scope_summary_capability_name(scope_chunks)
if not fallback_name:
return []
return [
CandidateCapabilityDraft(
name=fallback_name,
description=(
"Reviewable current-state capability inferred from SCOPE.md "
"summary text. A curator should split this into more precise "
"capabilities when reviewing."
),
inputs=[],
outputs=[fallback_name],
confidence=0.45,
source_refs=source_refs,
primary_class="scope-derived",
attributes=[
"scope-derived",
"current-state",
"review-required-scope",
],
evidence=[
CandidateEvidenceDraft(
type="scope-current-state",
reference="SCOPE.md",
strength="weak",
source_refs=source_refs,
)
],
)
]
def _scope_capability_blocks(
self,
chunks: list[ContentChunk],
) -> list[dict[str, str]]:
blocks: list[dict[str, str]] = []
in_block = False
current: dict[str, str] = {}
current_key = ""
for chunk in sorted(chunks, key=lambda item: (item.path, item.start_line)):
for raw_line in chunk.text.splitlines():
line = raw_line.rstrip()
stripped = line.strip()
if stripped.startswith("```capability"):
in_block = True
current = {}
current_key = ""
continue
if in_block and stripped.startswith("```"):
if current:
blocks.append(current)
in_block = False
current = {}
current_key = ""
continue
if not in_block:
continue
key, separator, value = stripped.partition(":")
if separator and re.match(r"^[A-Za-z_][A-Za-z0-9_-]*$", key):
current_key = key.lower()
current[current_key] = value.strip().strip('"')
elif current_key and stripped:
current[current_key] = (
f"{current[current_key]} {stripped.strip()}"
).strip()
return blocks
def _scope_keywords(self, value: str) -> list[str]:
cleaned = value.strip()
if cleaned.startswith("[") and cleaned.endswith("]"):
cleaned = cleaned[1:-1]
return [
item.strip(" `\"'")
for item in cleaned.split(",")
if item.strip(" `\"'")
][:8]
def _scope_summary_capability_name(self, chunks: list[ContentChunk]) -> str:
one_liner = self._scope_one_liner(chunks)
if one_liner:
return self._imperative_purpose(one_liner)
return ""
def _fact_derived_capabilities(
self,
*,
configs: list[ObservedFact],
manifests: list[ObservedFact],
frameworks: list[ObservedFact],
languages: list[ObservedFact],
docs: list[ObservedFact],
tests: list[ObservedFact],
chunks: list[ContentChunk],
) -> list[CandidateCapabilityDraft]:
if not configs:
return []
capability_facts = configs + manifests + frameworks + languages
if not capability_facts:
return []
features: list[CandidateFeatureDraft] = []
for label, kind, facts in (
("Manage Repository Configuration", "configuration", configs),
("Declare Runtime And Package Manifests", "manifest", manifests),
("Use Detected Frameworks", "framework", frameworks),
("Provide Implementation In Detected Languages", "implementation", languages),
):
if not facts:
continue
features.append(
CandidateFeatureDraft(
name=label,
type=kind,
location=self._grouped_location(facts),
confidence=0.45,
source_refs=self._source_refs(facts),
primary_class=kind,
attributes=[kind, "fact-derived", "review-required"],
)
)
if not features:
return []
name = self._fact_derived_capability_name(chunks, features)
return [
CandidateCapabilityDraft(
name=name,
description=(
"Reviewable capability inferred from deterministic facts. "
"This fills the hierarchy when no stronger intent, scope "
"capability, or interface candidate exists."
),
inputs=self._feature_inputs(features),
outputs=self._feature_outputs(features),
confidence=self._confidence(
0.35,
[
(0.10, bool(configs)),
(0.10, bool(manifests)),
(0.05, bool(frameworks)),
(0.05, bool(tests)),
(0.05, bool(docs)),
],
),
source_refs=self._source_refs(capability_facts),
primary_class="fact-derived",
attributes=["fact-derived", "review-required", "partial-hierarchy"],
features=features,
evidence=self._evidence(tests, [], docs),
)
]
def _fact_derived_capability_name(
self,
chunks: list[ContentChunk],
features: list[CandidateFeatureDraft],
) -> str:
scope_name = self._scope_summary_capability_name(chunks)
if scope_name:
return scope_name
if any(feature.type == "configuration" for feature in features):
return "Manage Repository Configuration"
if any(feature.type == "manifest" for feature in features):
return "Declare Repository Runtime"
return "Describe Repository Implementation"
def _repo_scoping_native_capabilities(
self,
repository: Repository,
@@ -1219,40 +1493,110 @@ class CandidateGraphGenerator:
ops_name = self._operations_ability_name(chunks)
if ops_name:
return ops_name
purpose_text = self._document_purpose_sentence(chunks) or repository.description
purpose_text = (
self._intent_purpose_sentence(chunks)
or self._scope_one_liner(chunks)
or self._documentation_purpose_sentence(chunks)
or repository.description
)
if purpose_text:
normalized = self._imperative_purpose(purpose_text)
if normalized:
return normalized
return f"Support {self._humanize_identifier(repository.name)}"
def _document_purpose_sentence(self, chunks: list[ContentChunk]) -> str:
for chunk in self._purpose_chunks(chunks):
def _intent_purpose_sentence(self, chunks: list[ContentChunk]) -> str:
return self._purpose_sentence_for_chunks(
[
chunk
for chunk in self._purpose_chunks(chunks)
if chunk.kind == "intent"
or chunk.metadata.get("source_role") == "intent_summary"
or chunk.path.lower().endswith("intent.md")
]
)
def _documentation_purpose_sentence(self, chunks: list[ContentChunk]) -> str:
return self._purpose_sentence_for_chunks(
[
chunk
for chunk in self._purpose_chunks(chunks)
if chunk.kind == "documentation"
and chunk.metadata.get("source_role") != "derived_scope"
and not chunk.path.lower().endswith("scope.md")
]
)
def _purpose_sentence_for_chunks(self, chunks: list[ContentChunk]) -> str:
for chunk in chunks:
if chunk.kind not in {"intent", "documentation"}:
continue
lines = [line.strip() for line in chunk.text.splitlines() if line.strip()]
paragraph = next((line for line in lines if not line.startswith("#")), "")
if paragraph:
if paragraph and not self._is_template_boilerplate(paragraph):
return paragraph
return ""
def _scope_one_liner(self, chunks: list[ContentChunk]) -> str:
for chunk in sorted(chunks, key=lambda item: (item.path, item.start_line)):
if not (
chunk.kind == "scope"
or chunk.metadata.get("source_role") == "derived_scope"
or chunk.path.lower().endswith("scope.md")
):
continue
lines = chunk.text.splitlines()
for index, raw_line in enumerate(lines):
if raw_line.strip().lower() == "## one-liner":
for following in lines[index + 1 :]:
candidate = following.strip()
if not candidate or candidate.startswith("---"):
continue
if candidate.startswith(">"):
continue
return candidate.strip(" .")
before_first_section: list[str] = []
for raw_line in lines:
candidate = raw_line.strip()
if candidate.startswith("## "):
break
before_first_section.append(candidate)
for candidate in before_first_section:
if (
candidate
and not candidate.startswith("#")
and not candidate.startswith(">")
and not candidate.startswith("---")
and not self._is_template_boilerplate(candidate)
):
return candidate.strip(" .")
return ""
def _is_template_boilerplate(self, text: str) -> bool:
lowered = text.lower()
return (
"git repository template to bootstrap" in lowered
or "this file helps you quickly understand" in lowered
or "intentionally lightweight and may be incomplete" in lowered
)
def _purpose_chunks(self, chunks: list[ContentChunk]) -> list[ContentChunk]:
def priority(chunk: ContentChunk) -> tuple[int, str, int]:
role = chunk.metadata.get("source_role")
path = chunk.path.lower()
if role == "intent_summary" or path.endswith("intent.md"):
return (0, path, chunk.start_line)
if role == "product_documentation" or path.startswith("readme"):
return (1, path, chunk.start_line)
if role == "derived_scope" or path.endswith("scope.md"):
return (3, path, chunk.start_line)
return (2, path, chunk.start_line)
return (1, path, chunk.start_line)
if role == "product_documentation" or path.startswith("readme"):
return (2, path, chunk.start_line)
return (3, path, chunk.start_line)
return sorted(
[
chunk
for chunk in chunks
if chunk.kind in {"intent", "documentation"}
if chunk.kind in {"intent", "documentation", "scope"}
and chunk.metadata.get("source_role") != "agent_guidance"
],
key=priority,
@@ -1284,9 +1628,11 @@ class CandidateGraphGenerator:
if not words:
return ""
words[0] = self._imperative_verb(words[0])
return self._title_from_words(words[:8])
return self._title_from_words(words[:10])
def _imperative_verb(self, word: str) -> str:
if word.isupper():
return word
lower = word.lower().strip(",;:")
irregular = {
"does": "do",
@@ -1313,7 +1659,7 @@ class CandidateGraphGenerator:
for word in words
]
return " ".join(
word[:1].upper() + word[1:]
word if word.isupper() else word[:1].upper() + word[1:]
for word in cleaned_words
if word
)
@@ -1341,17 +1687,37 @@ class CandidateGraphGenerator:
lines = [line.strip() for line in chunk.text.splitlines() if line.strip()]
if not lines:
continue
if chunk.kind == "scope" or chunk.metadata.get("source_role") == "derived_scope":
one_liner = self._scope_one_liner([chunk])
if one_liner:
return f"SCOPE. {one_liner}"
heading = next((line.lstrip("#").strip() for line in lines if line.startswith("#")), "")
paragraph = next((line for line in lines if not line.startswith("#")), "")
if self._is_template_boilerplate(paragraph):
paragraph = ""
if heading and paragraph:
return f"{heading}. {paragraph}"
return heading or paragraph
return ""
def _documentation_chunks(self, chunks: list[ContentChunk]) -> list[ContentChunk]:
def priority(chunk: ContentChunk) -> tuple[int, str, int]:
role = chunk.metadata.get("source_role")
path = chunk.path.lower()
if chunk.kind == "intent" or role == "intent_summary" or path.endswith("intent.md"):
return (0, path, chunk.start_line)
if chunk.kind == "scope" or role == "derived_scope" or path.endswith("scope.md"):
return (1, path, chunk.start_line)
return (2, path, chunk.start_line)
return sorted(
[chunk for chunk in chunks if chunk.kind in {"intent", "documentation"}],
key=lambda chunk: (0 if chunk.kind == "intent" else 1, chunk.path, chunk.start_line),
[
chunk
for chunk in chunks
if chunk.kind in {"intent", "documentation", "scope"}
and chunk.metadata.get("source_role") != "agent_guidance"
],
key=priority,
)
def _interface_summary(self, chunks: list[ContentChunk]) -> str: