Implement scope-derived candidate review infrastructure

This commit is contained in:
2026-05-16 00:26:29 +02:00
parent f4d782c997
commit ba2228e889
14 changed files with 1740 additions and 39 deletions

View File

@@ -8,6 +8,7 @@ from repo_scoping.acceptance.criteria import (
load_quality_criteria,
)
from repo_scoping.core.models import (
CandidateAbility,
CandidateCapability,
CandidateFeature,
CandidateGraph,
@@ -39,11 +40,33 @@ def evaluate_candidate_graph_quality(
active_registry = registry or load_quality_criteria()
outcomes: list[QualityGateOutcome] = []
for ability in graph.abilities:
outcomes.extend(evaluate_candidate_ability_quality(ability, active_registry))
for capability in ability.capabilities:
outcomes.extend(evaluate_candidate_capability_quality(capability, active_registry))
return outcomes
def evaluate_candidate_ability_quality(
ability: CandidateAbility,
registry: QualityCriteriaRegistry | None = None,
) -> list[QualityGateOutcome]:
active_registry = registry or load_quality_criteria()
criteria = {criterion.id: criterion for criterion in active_registry.criteria}
outcomes: list[QualityGateOutcome] = []
if _looks_template_contaminated(ability.name, ability.description):
outcomes.append(
_outcome(
active_registry,
criteria["RREG-QC-007"],
element_type="ability",
element_id=ability.id,
element_name=ability.name,
reason="Candidate ability appears to be based on template boilerplate.",
)
)
return outcomes
def evaluate_candidate_capability_quality(
capability: CandidateCapability,
registry: QualityCriteriaRegistry | None = None,
@@ -75,6 +98,17 @@ def evaluate_candidate_capability_quality(
reason="Candidate is supported only by generated SCOPE.md evidence.",
)
)
elif _has_scope_refs_or_attributes(refs, capability.attributes):
outcomes.append(
_outcome(
active_registry,
criteria["RREG-QC-008"],
element_type="capability",
element_id=capability.id,
element_name=capability.name,
reason="Candidate is scope-derived and must remain review-only until separated from intent.",
)
)
elif _all_weak_source_refs(refs):
outcomes.append(
_outcome(
@@ -97,6 +131,18 @@ def evaluate_candidate_capability_quality(
)
)
if _looks_template_contaminated(capability.name, capability.description):
outcomes.append(
_outcome(
active_registry,
criteria["RREG-QC-007"],
element_type="capability",
element_id=capability.id,
element_name=capability.name,
reason="Candidate capability appears to be based on template boilerplate.",
)
)
if _looks_like_provider_routing(capability):
outcomes.append(
_outcome(
@@ -197,6 +243,25 @@ def _all_generated_scope_refs(refs: list[SourceReference]) -> bool:
return bool(refs) and all(ref.path.endswith("SCOPE.md") for ref in refs)
def _has_scope_refs_or_attributes(
refs: list[SourceReference],
attributes: list[str],
) -> bool:
return any(ref.path.endswith("SCOPE.md") for ref in refs) or any(
attribute in {"scope-derived", "review-required-scope"}
for attribute in attributes
)
def _looks_template_contaminated(name: str, description: str) -> bool:
text = f"{name} {description}".lower()
return (
"repo-seed" in text
or "git repository template to bootstrap" in text
or "bootstrap coulomb projects" in text
)
def _all_weak_source_refs(refs: list[SourceReference]) -> bool:
return bool(refs) and all(_is_weak_source_ref(ref) for ref in refs)

View File

@@ -275,6 +275,8 @@ class CandidateGraphGenerator:
manifests = self._facts(facts, "manifest")
frameworks = self._facts(facts, "framework")
languages = self._facts(facts, "language")
configs = self._facts(facts, "config")
scope_facts = self._facts(facts, "scope")
llm_providers = self._facts(facts, "llm_provider")
credential_configs = self._facts(facts, "credential_config")
provider_registries = self._facts(facts, "provider_registry")
@@ -286,7 +288,7 @@ class CandidateGraphGenerator:
chunks,
)
ability_sources = docs or manifests or languages
ability_sources = docs or scope_facts or manifests or languages or configs
ability = CandidateAbilityDraft(
name=self._ability_name(repository, chunks),
description=self._ability_description(chunks),
@@ -308,6 +310,15 @@ class CandidateGraphGenerator:
capabilities.extend(
self._intent_capabilities(intent_facts, chunks, tests, examples, docs)
)
capabilities.extend(
self._scope_capabilities(
scope_facts,
chunks,
tests,
examples,
allow_summary_fallback=not intent_facts,
)
)
capabilities.extend(
self._repo_scoping_native_capabilities(
repository,
@@ -347,6 +358,18 @@ class CandidateGraphGenerator:
capabilities.append(
self._interface_capability(interfaces, tests, examples, docs, chunks)
)
if not capabilities:
capabilities.extend(
self._fact_derived_capabilities(
configs=configs,
manifests=manifests,
frameworks=frameworks,
languages=languages,
docs=docs,
tests=tests,
chunks=chunks,
)
)
return [
CandidateAbilityDraft(
@@ -582,6 +605,257 @@ class CandidateGraphGenerator:
words.pop()
return self._title_from_words(words[:10])
def _scope_capabilities(
self,
scope_facts: list[ObservedFact],
chunks: list[ContentChunk],
tests: list[ObservedFact],
examples: list[ObservedFact],
*,
allow_summary_fallback: bool = True,
) -> list[CandidateCapabilityDraft]:
scope_chunks = [
chunk
for chunk in chunks
if chunk.kind == "scope"
or chunk.metadata.get("source_role") == "derived_scope"
or chunk.path.lower().endswith("scope.md")
]
if not scope_chunks:
return []
source_refs = self._source_refs(scope_facts)
capabilities: list[CandidateCapabilityDraft] = []
seen: set[str] = set()
for block in self._scope_capability_blocks(scope_chunks):
title = block.get("title", "").strip()
if not title:
continue
key = title.lower()
if key in seen:
continue
seen.add(key)
capability_type = block.get("type", "scope-derived").strip() or "scope-derived"
description = block.get("description", "").strip()
keywords = self._scope_keywords(block.get("keywords", ""))
attributes = self._unique(
[
capability_type,
*keywords,
"scope-derived",
"current-state",
"review-required-scope",
]
)
feature = CandidateFeatureDraft(
name=title,
type=capability_type,
location="SCOPE.md",
confidence=0.55,
source_refs=source_refs,
primary_class=capability_type,
attributes=self._unique(
[capability_type, "scope-defined", "review-required-scope"]
),
)
capabilities.append(
CandidateCapabilityDraft(
name=title,
description=(
"Reviewable current-state capability extracted from "
f"SCOPE.md: {description or title}"
),
inputs=[],
outputs=[title],
confidence=self._confidence(
0.45,
[
(0.10, bool(description)),
(0.05, bool(keywords)),
(0.05, bool(tests)),
(0.05, bool(examples)),
],
),
source_refs=source_refs,
primary_class=capability_type,
attributes=attributes,
features=[feature],
evidence=[
CandidateEvidenceDraft(
type="scope-current-state",
reference="SCOPE.md",
strength="medium",
source_refs=source_refs,
)
],
)
)
if capabilities or not allow_summary_fallback:
return capabilities
fallback_name = self._scope_summary_capability_name(scope_chunks)
if not fallback_name:
return []
return [
CandidateCapabilityDraft(
name=fallback_name,
description=(
"Reviewable current-state capability inferred from SCOPE.md "
"summary text. A curator should split this into more precise "
"capabilities when reviewing."
),
inputs=[],
outputs=[fallback_name],
confidence=0.45,
source_refs=source_refs,
primary_class="scope-derived",
attributes=[
"scope-derived",
"current-state",
"review-required-scope",
],
evidence=[
CandidateEvidenceDraft(
type="scope-current-state",
reference="SCOPE.md",
strength="weak",
source_refs=source_refs,
)
],
)
]
def _scope_capability_blocks(
self,
chunks: list[ContentChunk],
) -> list[dict[str, str]]:
blocks: list[dict[str, str]] = []
in_block = False
current: dict[str, str] = {}
current_key = ""
for chunk in sorted(chunks, key=lambda item: (item.path, item.start_line)):
for raw_line in chunk.text.splitlines():
line = raw_line.rstrip()
stripped = line.strip()
if stripped.startswith("```capability"):
in_block = True
current = {}
current_key = ""
continue
if in_block and stripped.startswith("```"):
if current:
blocks.append(current)
in_block = False
current = {}
current_key = ""
continue
if not in_block:
continue
key, separator, value = stripped.partition(":")
if separator and re.match(r"^[A-Za-z_][A-Za-z0-9_-]*$", key):
current_key = key.lower()
current[current_key] = value.strip().strip('"')
elif current_key and stripped:
current[current_key] = (
f"{current[current_key]} {stripped.strip()}"
).strip()
return blocks
def _scope_keywords(self, value: str) -> list[str]:
cleaned = value.strip()
if cleaned.startswith("[") and cleaned.endswith("]"):
cleaned = cleaned[1:-1]
return [
item.strip(" `\"'")
for item in cleaned.split(",")
if item.strip(" `\"'")
][:8]
def _scope_summary_capability_name(self, chunks: list[ContentChunk]) -> str:
one_liner = self._scope_one_liner(chunks)
if one_liner:
return self._imperative_purpose(one_liner)
return ""
def _fact_derived_capabilities(
self,
*,
configs: list[ObservedFact],
manifests: list[ObservedFact],
frameworks: list[ObservedFact],
languages: list[ObservedFact],
docs: list[ObservedFact],
tests: list[ObservedFact],
chunks: list[ContentChunk],
) -> list[CandidateCapabilityDraft]:
if not configs:
return []
capability_facts = configs + manifests + frameworks + languages
if not capability_facts:
return []
features: list[CandidateFeatureDraft] = []
for label, kind, facts in (
("Manage Repository Configuration", "configuration", configs),
("Declare Runtime And Package Manifests", "manifest", manifests),
("Use Detected Frameworks", "framework", frameworks),
("Provide Implementation In Detected Languages", "implementation", languages),
):
if not facts:
continue
features.append(
CandidateFeatureDraft(
name=label,
type=kind,
location=self._grouped_location(facts),
confidence=0.45,
source_refs=self._source_refs(facts),
primary_class=kind,
attributes=[kind, "fact-derived", "review-required"],
)
)
if not features:
return []
name = self._fact_derived_capability_name(chunks, features)
return [
CandidateCapabilityDraft(
name=name,
description=(
"Reviewable capability inferred from deterministic facts. "
"This fills the hierarchy when no stronger intent, scope "
"capability, or interface candidate exists."
),
inputs=self._feature_inputs(features),
outputs=self._feature_outputs(features),
confidence=self._confidence(
0.35,
[
(0.10, bool(configs)),
(0.10, bool(manifests)),
(0.05, bool(frameworks)),
(0.05, bool(tests)),
(0.05, bool(docs)),
],
),
source_refs=self._source_refs(capability_facts),
primary_class="fact-derived",
attributes=["fact-derived", "review-required", "partial-hierarchy"],
features=features,
evidence=self._evidence(tests, [], docs),
)
]
def _fact_derived_capability_name(
self,
chunks: list[ContentChunk],
features: list[CandidateFeatureDraft],
) -> str:
scope_name = self._scope_summary_capability_name(chunks)
if scope_name:
return scope_name
if any(feature.type == "configuration" for feature in features):
return "Manage Repository Configuration"
if any(feature.type == "manifest" for feature in features):
return "Declare Repository Runtime"
return "Describe Repository Implementation"
def _repo_scoping_native_capabilities(
self,
repository: Repository,
@@ -1219,40 +1493,110 @@ class CandidateGraphGenerator:
ops_name = self._operations_ability_name(chunks)
if ops_name:
return ops_name
purpose_text = self._document_purpose_sentence(chunks) or repository.description
purpose_text = (
self._intent_purpose_sentence(chunks)
or self._scope_one_liner(chunks)
or self._documentation_purpose_sentence(chunks)
or repository.description
)
if purpose_text:
normalized = self._imperative_purpose(purpose_text)
if normalized:
return normalized
return f"Support {self._humanize_identifier(repository.name)}"
def _document_purpose_sentence(self, chunks: list[ContentChunk]) -> str:
for chunk in self._purpose_chunks(chunks):
def _intent_purpose_sentence(self, chunks: list[ContentChunk]) -> str:
return self._purpose_sentence_for_chunks(
[
chunk
for chunk in self._purpose_chunks(chunks)
if chunk.kind == "intent"
or chunk.metadata.get("source_role") == "intent_summary"
or chunk.path.lower().endswith("intent.md")
]
)
def _documentation_purpose_sentence(self, chunks: list[ContentChunk]) -> str:
return self._purpose_sentence_for_chunks(
[
chunk
for chunk in self._purpose_chunks(chunks)
if chunk.kind == "documentation"
and chunk.metadata.get("source_role") != "derived_scope"
and not chunk.path.lower().endswith("scope.md")
]
)
def _purpose_sentence_for_chunks(self, chunks: list[ContentChunk]) -> str:
for chunk in chunks:
if chunk.kind not in {"intent", "documentation"}:
continue
lines = [line.strip() for line in chunk.text.splitlines() if line.strip()]
paragraph = next((line for line in lines if not line.startswith("#")), "")
if paragraph:
if paragraph and not self._is_template_boilerplate(paragraph):
return paragraph
return ""
def _scope_one_liner(self, chunks: list[ContentChunk]) -> str:
for chunk in sorted(chunks, key=lambda item: (item.path, item.start_line)):
if not (
chunk.kind == "scope"
or chunk.metadata.get("source_role") == "derived_scope"
or chunk.path.lower().endswith("scope.md")
):
continue
lines = chunk.text.splitlines()
for index, raw_line in enumerate(lines):
if raw_line.strip().lower() == "## one-liner":
for following in lines[index + 1 :]:
candidate = following.strip()
if not candidate or candidate.startswith("---"):
continue
if candidate.startswith(">"):
continue
return candidate.strip(" .")
before_first_section: list[str] = []
for raw_line in lines:
candidate = raw_line.strip()
if candidate.startswith("## "):
break
before_first_section.append(candidate)
for candidate in before_first_section:
if (
candidate
and not candidate.startswith("#")
and not candidate.startswith(">")
and not candidate.startswith("---")
and not self._is_template_boilerplate(candidate)
):
return candidate.strip(" .")
return ""
def _is_template_boilerplate(self, text: str) -> bool:
lowered = text.lower()
return (
"git repository template to bootstrap" in lowered
or "this file helps you quickly understand" in lowered
or "intentionally lightweight and may be incomplete" in lowered
)
def _purpose_chunks(self, chunks: list[ContentChunk]) -> list[ContentChunk]:
def priority(chunk: ContentChunk) -> tuple[int, str, int]:
role = chunk.metadata.get("source_role")
path = chunk.path.lower()
if role == "intent_summary" or path.endswith("intent.md"):
return (0, path, chunk.start_line)
if role == "product_documentation" or path.startswith("readme"):
return (1, path, chunk.start_line)
if role == "derived_scope" or path.endswith("scope.md"):
return (3, path, chunk.start_line)
return (2, path, chunk.start_line)
return (1, path, chunk.start_line)
if role == "product_documentation" or path.startswith("readme"):
return (2, path, chunk.start_line)
return (3, path, chunk.start_line)
return sorted(
[
chunk
for chunk in chunks
if chunk.kind in {"intent", "documentation"}
if chunk.kind in {"intent", "documentation", "scope"}
and chunk.metadata.get("source_role") != "agent_guidance"
],
key=priority,
@@ -1284,9 +1628,11 @@ class CandidateGraphGenerator:
if not words:
return ""
words[0] = self._imperative_verb(words[0])
return self._title_from_words(words[:8])
return self._title_from_words(words[:10])
def _imperative_verb(self, word: str) -> str:
if word.isupper():
return word
lower = word.lower().strip(",;:")
irregular = {
"does": "do",
@@ -1313,7 +1659,7 @@ class CandidateGraphGenerator:
for word in words
]
return " ".join(
word[:1].upper() + word[1:]
word if word.isupper() else word[:1].upper() + word[1:]
for word in cleaned_words
if word
)
@@ -1341,17 +1687,37 @@ class CandidateGraphGenerator:
lines = [line.strip() for line in chunk.text.splitlines() if line.strip()]
if not lines:
continue
if chunk.kind == "scope" or chunk.metadata.get("source_role") == "derived_scope":
one_liner = self._scope_one_liner([chunk])
if one_liner:
return f"SCOPE. {one_liner}"
heading = next((line.lstrip("#").strip() for line in lines if line.startswith("#")), "")
paragraph = next((line for line in lines if not line.startswith("#")), "")
if self._is_template_boilerplate(paragraph):
paragraph = ""
if heading and paragraph:
return f"{heading}. {paragraph}"
return heading or paragraph
return ""
def _documentation_chunks(self, chunks: list[ContentChunk]) -> list[ContentChunk]:
def priority(chunk: ContentChunk) -> tuple[int, str, int]:
role = chunk.metadata.get("source_role")
path = chunk.path.lower()
if chunk.kind == "intent" or role == "intent_summary" or path.endswith("intent.md"):
return (0, path, chunk.start_line)
if chunk.kind == "scope" or role == "derived_scope" or path.endswith("scope.md"):
return (1, path, chunk.start_line)
return (2, path, chunk.start_line)
return sorted(
[chunk for chunk in chunks if chunk.kind in {"intent", "documentation"}],
key=lambda chunk: (0 if chunk.kind == "intent" else 1, chunk.path, chunk.start_line),
[
chunk
for chunk in chunks
if chunk.kind in {"intent", "documentation", "scope"}
and chunk.metadata.get("source_role") != "agent_guidance"
],
key=priority,
)
def _interface_summary(self, chunks: list[ContentChunk]) -> str:

View File

@@ -186,6 +186,19 @@ def build_parser() -> argparse.ArgumentParser:
default="markdown",
help="Inventory output format.",
)
dataset = subparsers.add_parser(
"assess-dataset",
help="Summarize repository generation coverage across the local dataset.",
)
dataset.add_argument("--database-path", help="Override REPO_SCOPING_DATABASE_PATH.")
dataset.add_argument("--checkout-root", help="Override REPO_SCOPING_CHECKOUT_ROOT.")
dataset.add_argument("--output", help="Write dataset assessment to this path instead of stdout.")
dataset.add_argument(
"--format",
choices=["json", "markdown"],
default="markdown",
help="Dataset assessment output format.",
)
return parser
@@ -204,6 +217,8 @@ def main(argv: Sequence[str] | None = None) -> int:
return list_quality_criteria_command(args)
if args.command == "list-legacy-auto-approvals":
return list_legacy_auto_approvals_command(args)
if args.command == "assess-dataset":
return assess_dataset_command(args)
parser.error(f"unknown command: {args.command}")
return 2
@@ -285,6 +300,235 @@ def list_legacy_auto_approvals_command(args: argparse.Namespace) -> int:
return 0
def assess_dataset_command(args: argparse.Namespace) -> int:
service = service_from_args(args)
report = dataset_assessment(service)
content = (
json.dumps(report, indent=2) + "\n"
if args.format == "json"
else dataset_assessment_markdown(report)
)
if args.output:
write_text(args.output, content)
else:
print(content, end="" if content.endswith("\n") else "\n")
return 0
def dataset_assessment(service: RegistryService) -> dict[str, object]:
repositories = []
totals = {
"repositories": 0,
"facts": 0,
"content_chunks": 0,
"candidate_abilities": 0,
"candidate_capabilities": 0,
"candidate_features": 0,
"candidate_evidence": 0,
"approved_abilities": 0,
"approved_capabilities": 0,
"approved_features": 0,
"approved_evidence": 0,
"dependency_graph_nodes": 0,
"dependency_graph_edges": 0,
}
for repository in service.list_repositories():
runs = service.list_analysis_runs(repository.id)
latest_run = next((run for run in reversed(runs) if run.status == "completed"), None)
facts = service.list_observed_facts(repository.id, latest_run.id) if latest_run else []
chunks = service.list_content_chunks(repository.id, latest_run.id) if latest_run else []
candidate_counts = {
"abilities": 0,
"capabilities": 0,
"features": 0,
"evidence": 0,
}
candidate_names: list[str] = []
if latest_run is not None:
try:
graph = service.candidate_graph(repository.id, latest_run.id)
except NotFoundError:
graph = None
if graph is not None:
candidate_counts = candidate_graph_counts(graph)
candidate_names = [
ability.name
for ability in graph.abilities
][:5]
ability_map = service.ability_map(repository.id)
approved_counts = approved_graph_counts(ability_map)
graph_metrics = {"node_count": 0, "edge_count": 0}
try:
dependency_graph = service.dependency_graph_elements(repository.id)
graph_metrics = {
"node_count": int(dependency_graph["metrics"]["node_count"]),
"edge_count": int(dependency_graph["metrics"]["edge_count"]),
}
except (NotFoundError, ValueError):
pass
snapshot = (
service.store.get_snapshot(latest_run.snapshot_id)
if latest_run is not None and latest_run.snapshot_id is not None
else None
)
doc_presence = document_presence(snapshot.source_path if snapshot else "")
issues = dataset_assessment_issues(
fact_count=len(facts),
chunk_count=len(chunks),
candidate_counts=candidate_counts,
approved_counts=approved_counts,
graph_metrics=graph_metrics,
doc_presence=doc_presence,
candidate_names=candidate_names,
)
repositories.append(
{
"repository_id": repository.id,
"name": repository.name,
"status": repository.status,
"latest_analysis_run_id": latest_run.id if latest_run else None,
"latest_analysis_run_status": latest_run.status if latest_run else None,
"facts": len(facts),
"content_chunks": len(chunks),
"candidate_counts": candidate_counts,
"approved_counts": approved_counts,
"dependency_graph": graph_metrics,
"documents": doc_presence,
"candidate_ability_names": candidate_names,
"issues": issues,
}
)
totals["repositories"] += 1
totals["facts"] += len(facts)
totals["content_chunks"] += len(chunks)
totals["candidate_abilities"] += candidate_counts["abilities"]
totals["candidate_capabilities"] += candidate_counts["capabilities"]
totals["candidate_features"] += candidate_counts["features"]
totals["candidate_evidence"] += candidate_counts["evidence"]
totals["approved_abilities"] += approved_counts["abilities"]
totals["approved_capabilities"] += approved_counts["capabilities"]
totals["approved_features"] += approved_counts["features"]
totals["approved_evidence"] += approved_counts["evidence"]
totals["dependency_graph_nodes"] += graph_metrics["node_count"]
totals["dependency_graph_edges"] += graph_metrics["edge_count"]
return {
"schema_version": "repo-scoping-dataset-assessment/v1",
"summary": totals,
"repositories": repositories,
}
def candidate_graph_counts(graph) -> dict[str, int]:
capabilities = [
capability
for ability in graph.abilities
for capability in ability.capabilities
]
return {
"abilities": len(graph.abilities),
"capabilities": len(capabilities),
"features": sum(len(capability.features) for capability in capabilities),
"evidence": sum(len(capability.evidence) for capability in capabilities),
}
def approved_graph_counts(ability_map) -> dict[str, int]:
capabilities = [
capability
for ability in ability_map.abilities
for capability in ability.capabilities
]
return {
"scope": 1 if ability_map.scope else 0,
"abilities": len(ability_map.abilities),
"capabilities": len(capabilities),
"features": sum(len(capability.features) for capability in capabilities),
"evidence": sum(len(capability.evidence) for capability in capabilities),
}
def document_presence(source_path: str) -> dict[str, bool]:
if not source_path:
return {
"INTENT.md": False,
"SCOPE.md": False,
"README": False,
"CLAUDE.md": False,
"AGENTS.md": False,
}
root = Path(source_path)
return {
"INTENT.md": (root / "INTENT.md").is_file(),
"SCOPE.md": (root / "SCOPE.md").is_file(),
"README": any(root.glob("README*")),
"CLAUDE.md": (root / "CLAUDE.md").is_file(),
"AGENTS.md": (root / "AGENTS.md").is_file(),
}
def dataset_assessment_issues(
*,
fact_count: int,
chunk_count: int,
candidate_counts: dict[str, int],
approved_counts: dict[str, int],
graph_metrics: dict[str, int],
doc_presence: dict[str, bool],
candidate_names: list[str],
) -> list[str]:
issues: list[str] = []
if fact_count and not candidate_counts["capabilities"]:
issues.append("facts-without-candidate-capabilities")
if chunk_count and doc_presence.get("SCOPE.md") and not candidate_counts["capabilities"]:
issues.append("scope-text-unused-for-lower-hierarchy")
if fact_count and not graph_metrics["node_count"]:
issues.append("facts-with-empty-dependency-graph")
if approved_counts["abilities"] == 0 and graph_metrics["node_count"] == 0:
issues.append("approved-hierarchy-missing-and-no-graph-fallback")
if any("repo-seed" in name.lower() for name in candidate_names):
issues.append("template-readme-contamination")
return issues
def dataset_assessment_markdown(report: dict[str, object]) -> str:
lines = ["# Repo-Scoping Dataset Assessment", ""]
summary = report["summary"]
lines.extend(
[
f"- Repositories: {summary['repositories']}",
f"- Facts: {summary['facts']}",
f"- Candidate hierarchy: {summary['candidate_abilities']} abilities / "
f"{summary['candidate_capabilities']} capabilities / "
f"{summary['candidate_features']} features / "
f"{summary['candidate_evidence']} evidence",
f"- Approved hierarchy: {summary['approved_abilities']} abilities / "
f"{summary['approved_capabilities']} capabilities / "
f"{summary['approved_features']} features / "
f"{summary['approved_evidence']} evidence",
f"- Dependency graph: {summary['dependency_graph_nodes']} nodes / "
f"{summary['dependency_graph_edges']} edges",
"",
"| Repo | Run | Facts | Chunks | Candidate | Approved | Graph | Issues |",
"| --- | ---: | ---: | ---: | --- | --- | --- | --- |",
]
)
for item in report["repositories"]:
candidate = item["candidate_counts"]
approved = item["approved_counts"]
graph = item["dependency_graph"]
lines.append(
f"| {item['name']} | {item['latest_analysis_run_id'] or '-'} | "
f"{item['facts']} | {item['content_chunks']} | "
f"{candidate['abilities']}/{candidate['capabilities']}/"
f"{candidate['features']}/{candidate['evidence']} | "
f"{approved['abilities']}/{approved['capabilities']}/"
f"{approved['features']}/{approved['evidence']} | "
f"{graph['node_count']}/{graph['edge_count']} | "
f"{', '.join(item['issues']) or '-'} |"
)
return "\n".join(lines) + "\n"
def legacy_auto_approval_records_markdown(records) -> str:
if not records:
return "No legacy trusted auto-approval records found.\n"

View File

@@ -2,6 +2,7 @@ from __future__ import annotations
from collections.abc import Sequence
from dataclasses import asdict, replace
from pathlib import Path
from typing import Any
from repo_scoping.acceptance import (
@@ -1492,6 +1493,22 @@ class RegistryService:
ability_map = self.store.get_ability_map(repository_id)
facts_by_id = {fact.id: fact for fact in self.store.list_observed_facts(repository_id)}
characteristic_index = self._dependency_characteristic_index(ability_map)
latest_candidate_graph = None
include_candidates = not ability_map.abilities
if include_candidates:
latest_run = self._latest_completed_run(repository_id)
latest_candidate_graph = (
self._candidate_graph_or_none(repository_id, latest_run.id)
if latest_run is not None
else None
)
if latest_candidate_graph is not None:
characteristic_index.update(
self._candidate_dependency_characteristic_index(
latest_candidate_graph,
ability_map,
)
)
nodes: dict[str, dict[str, object]] = {}
edge_sources: dict[str, DependencyEdge] = {}
@@ -1513,6 +1530,16 @@ class RegistryService:
if (display_edge := self._dependency_display_edge(edge, facts_by_id))
is not None
]
if latest_candidate_graph is not None:
graph_edges.extend(
display_edge
for edge in self._candidate_dependency_edges(
latest_candidate_graph,
ability_map,
)
if (display_edge := self._dependency_display_edge(edge, facts_by_id))
is not None
)
def ensure_node(kind: str, key: str, item_id: int | None) -> None:
if key in nodes:
@@ -1555,7 +1582,7 @@ class RegistryService:
"layer": self._dependency_layer(kind),
"label": detail.get("label")
or self._dependency_node_label(repository_id, kind, key, item_id),
"reviewState": "accepted",
"reviewState": detail.get("reviewState", "accepted"),
"name": detail.get("name")
or self._dependency_node_label(repository_id, kind, key, item_id),
"description": detail.get("description", ""),
@@ -1563,7 +1590,7 @@ class RegistryService:
"attributes": detail.get("attributes", []),
"confidence": detail.get("confidence"),
"visualSize": self._dependency_node_size(detail.get("confidence")),
"ownership": self._ownership_for_kind(kind),
"ownership": detail.get("ownership", self._ownership_for_kind(kind)),
"freshnessState": (
impact_item.freshness_state
if impact_item is not None
@@ -1587,6 +1614,7 @@ class RegistryService:
class_name
for class_name in (
kind,
str(detail.get("reviewState", "accepted")),
"stale" if impact_item is not None else "current",
"changed" if is_changed_fact else "",
)
@@ -2627,6 +2655,336 @@ class RegistryService:
)
return self.store.get_ability_map(repository_id)
def document_review(
self,
repository_id: int,
document_name: str,
) -> dict[str, object]:
normalized = document_name.upper()
if normalized not in {"INTENT.MD", "SCOPE.MD"}:
raise ValueError("document_name must be INTENT.md or SCOPE.md")
repository = self.store.get_repository(repository_id)
latest_run = self._latest_completed_run(repository_id)
facts = (
self.store.list_observed_facts(repository_id, latest_run.id)
if latest_run is not None
else []
)
chunks = (
self.store.list_content_chunks(repository_id, latest_run.id)
if latest_run is not None
else []
)
ability_map = self.store.get_ability_map(repository_id)
candidate_graph = (
self._candidate_graph_or_none(repository_id, latest_run.id)
if latest_run is not None
else None
)
snapshot = (
self.store.get_snapshot(latest_run.snapshot_id)
if latest_run is not None and latest_run.snapshot_id is not None
else None
)
source_root = Path(snapshot.source_path) if snapshot is not None else None
filename = "INTENT.md" if normalized == "INTENT.MD" else "SCOPE.md"
current_path = source_root / filename if source_root is not None else None
current_content = ""
if current_path is not None and current_path.is_file():
current_content = current_path.read_text(encoding="utf-8", errors="ignore")
draft_content = (
self._draft_intent_document(repository, ability_map, candidate_graph, facts, chunks)
if normalized == "INTENT.MD"
else self._draft_scope_document(repository, ability_map, candidate_graph, facts, chunks)
)
return {
"repository": asdict(repository),
"document": filename,
"path": str(current_path) if current_path is not None else "",
"exists": bool(current_content),
"current_content": current_content,
"draft_content": draft_content,
"draft_kind": "ambitious-intent" if normalized == "INTENT.MD" else "current-scope",
"write_policy": (
"review-only; repo-scoping does not write INTENT.md automatically"
if normalized == "INTENT.MD"
else "review-only from this endpoint; use the explicit scope write endpoint to write"
),
"provenance": self._document_review_provenance(
latest_run.id if latest_run is not None else None,
facts,
chunks,
candidate_graph,
),
}
def _latest_completed_run(self, repository_id: int) -> AnalysisRun | None:
completed = [
run
for run in self.store.list_analysis_runs(repository_id)
if run.status == "completed"
]
return completed[-1] if completed else None
def _candidate_graph_or_none(
self,
repository_id: int,
analysis_run_id: int,
) -> CandidateGraph | None:
try:
return self.store.get_candidate_graph(repository_id, analysis_run_id)
except NotFoundError:
return None
def _draft_intent_document(
self,
repository: Repository,
ability_map: RepositoryAbilityMap,
candidate_graph: CandidateGraph | None,
facts: list[ObservedFact],
chunks: list[ContentChunk],
) -> str:
one_liner = (
self._scope_one_liner_from_chunks(chunks)
or ability_map.scope.description
or repository.description
or f"{repository.name} should provide clearly reviewable repository utility."
)
capabilities = self._draft_capability_names(ability_map, candidate_graph)
boundaries = self._scope_section_items(chunks, "Not Relevant When")
related = self._scope_section_items(chunks, "Related / Overlapping")
lines = [
"# INTENT",
"",
"> Draft generated by repo-scoping for review.",
"> This is ambitious design intent derived from current scope, facts, and candidates.",
"> It is not written automatically.",
"",
"## Purpose",
"",
self._ambitious_intent_sentence(one_liner),
"",
"## Intended Capabilities",
"",
]
if capabilities:
lines.extend(f"- {name}" for name in capabilities)
else:
lines.append("- <!-- needs curator input -->")
lines.extend(["", "## Success Criteria", ""])
lines.extend(
[
"- The repository's useful behavior can be explained from source-linked evidence.",
"- Candidate capabilities can be reviewed without relying on template boilerplate.",
"- Scope and intent remain separate: current behavior informs but does not define ambition.",
]
)
lines.extend(["", "## Boundaries", ""])
if boundaries:
lines.extend(boundaries)
else:
lines.append("- <!-- needs curator input -->")
if related:
lines.extend(["", "## Related Repositories", ""])
lines.extend(related)
return "\n".join(lines).rstrip() + "\n"
def _draft_scope_document(
self,
repository: Repository,
ability_map: RepositoryAbilityMap,
candidate_graph: CandidateGraph | None,
facts: list[ObservedFact],
chunks: list[ContentChunk],
) -> str:
one_liner = (
self._scope_one_liner_from_chunks(chunks)
or ability_map.scope.description
or repository.description
or f"{repository.name} has observed repository behavior under review."
)
capabilities = self._draft_capability_names(ability_map, candidate_graph)
relevant = self._scope_section_items(chunks, "Relevant When")
not_relevant = self._scope_section_items(chunks, "Not Relevant When")
paths = sorted({fact.path for fact in facts if fact.path})[:8]
lines = [
"# SCOPE",
"",
"> Draft generated by repo-scoping for review.",
"> This describes current understood behavior and should be edited before writing.",
"",
"---",
"",
"## One-liner",
"",
one_liner,
"",
"## Core Idea",
"",
self._scope_core_idea_from_chunks(chunks) or one_liner,
"",
"## Relevant When",
"",
]
lines.extend(relevant or ["- <!-- needs curator input -->"])
lines.extend(["", "## Not Relevant When", ""])
lines.extend(not_relevant or ["- <!-- needs curator input -->"])
lines.extend(["", "## Current State", ""])
lines.extend(
[
f"- Repository status: {repository.status}",
f"- Facts observed: {len(facts)}",
f"- Candidate capabilities: {len(capabilities)}",
]
)
lines.extend(["", "## Getting Oriented", ""])
if paths:
lines.extend(
[
f"- Start with: {paths[0]}",
f"- Key files / directories: {', '.join(paths)}",
]
)
else:
lines.append("- <!-- needs curator input -->")
lines.extend(["", "## Provided Capabilities", ""])
if capabilities:
for name in capabilities:
lines.extend(
[
"```capability",
"type: draft",
f"title: {name}",
"description: Review this candidate capability before treating it as scope truth.",
"keywords: [draft, review-required]",
"```",
"",
]
)
else:
lines.append("<!-- needs curator input -->")
return "\n".join(lines).rstrip() + "\n"
def _document_review_provenance(
self,
analysis_run_id: int | None,
facts: list[ObservedFact],
chunks: list[ContentChunk],
candidate_graph: CandidateGraph | None,
) -> dict[str, object]:
return {
"analysis_run_id": analysis_run_id,
"fact_count": len(facts),
"content_chunk_count": len(chunks),
"candidate_counts": (
{
"abilities": len(candidate_graph.abilities),
"capabilities": sum(
len(ability.capabilities) for ability in candidate_graph.abilities
),
"features": sum(
len(capability.features)
for ability in candidate_graph.abilities
for capability in ability.capabilities
),
"evidence": sum(
len(capability.evidence)
for ability in candidate_graph.abilities
for capability in ability.capabilities
),
}
if candidate_graph is not None
else {
"abilities": 0,
"capabilities": 0,
"features": 0,
"evidence": 0,
}
),
"source_paths": sorted({fact.path for fact in facts if fact.path})[:12],
}
def _draft_capability_names(
self,
ability_map: RepositoryAbilityMap,
candidate_graph: CandidateGraph | None,
) -> list[str]:
approved = [
capability.name
for ability in ability_map.abilities
for capability in ability.capabilities
]
if approved:
return approved[:12]
if candidate_graph is None:
return []
names = [
capability.name
for ability in candidate_graph.abilities
for capability in ability.capabilities
if capability.status == "candidate"
]
if names:
return names[:12]
return [ability.name for ability in candidate_graph.abilities[:3]]
def _scope_one_liner_from_chunks(self, chunks: list[ContentChunk]) -> str:
for chunk in self._scope_chunks(chunks):
lines = chunk.text.splitlines()
for index, raw_line in enumerate(lines):
if raw_line.strip().lower() == "## one-liner":
for following in lines[index + 1 :]:
candidate = following.strip()
if candidate and not candidate.startswith(("---", ">")):
return candidate.strip(" .")
return ""
def _scope_core_idea_from_chunks(self, chunks: list[ContentChunk]) -> str:
items = self._scope_section_items(chunks, "Core Idea")
return "\n".join(items) if items else ""
def _scope_section_items(
self,
chunks: list[ContentChunk],
section_name: str,
) -> list[str]:
wanted = section_name.lower()
items: list[str] = []
in_section = False
for chunk in self._scope_chunks(chunks):
for raw_line in chunk.text.splitlines():
line = raw_line.strip()
if line.startswith("## "):
in_section = line.lstrip("#").strip().lower() == wanted
continue
if not in_section or not line or line.startswith("---"):
continue
if line.startswith("```"):
continue
if line.startswith("- "):
items.append(line)
elif not line.startswith("#"):
items.append(line)
return items[:10]
def _scope_chunks(self, chunks: list[ContentChunk]) -> list[ContentChunk]:
return sorted(
[
chunk
for chunk in chunks
if chunk.kind == "scope"
or chunk.metadata.get("source_role") == "derived_scope"
or chunk.path.lower().endswith("scope.md")
],
key=lambda chunk: (chunk.path, chunk.start_line),
)
def _ambitious_intent_sentence(self, current_scope: str) -> str:
cleaned = current_scope.strip().rstrip(".")
if not cleaned:
return "Provide a source-linked, reviewable repository capability."
return f"Provide a deliberate, reviewable implementation of: {cleaned}."
def ability_map(self, repository_id: int) -> RepositoryAbilityMap:
return self.store.get_ability_map(repository_id)
@@ -2965,6 +3323,74 @@ class RegistryService:
"sourceReferences": [
asdict(source_ref) for source_ref in evidence.source_refs
],
}
return index
def _candidate_dependency_characteristic_index(
self,
candidate_graph: CandidateGraph,
ability_map: RepositoryAbilityMap,
) -> dict[str, dict[str, object]]:
index: dict[str, dict[str, object]] = {
self._candidate_dependency_key("scope", ability_map.scope.id): {
"name": ability_map.scope.name,
"description": ability_map.scope.description,
"primaryClass": "draft-scope",
"attributes": ["draft", "scope", "candidate-derived"],
"confidence": ability_map.scope.confidence,
"reviewState": "draft",
"ownership": "curator_owned",
"sourceReferences": [],
}
}
for ability in candidate_graph.abilities:
index[self._candidate_dependency_key("ability", ability.id)] = {
"name": ability.name,
"description": ability.description,
"primaryClass": ability.primary_class,
"attributes": ability.attributes,
"confidence": ability.confidence,
"reviewState": ability.status,
"ownership": "mixed",
"sourceReferences": [asdict(ref) for ref in ability.source_refs],
}
for capability in ability.capabilities:
index[self._candidate_dependency_key("capability", capability.id)] = {
"name": capability.name,
"description": capability.description,
"primaryClass": capability.primary_class,
"attributes": capability.attributes,
"confidence": capability.confidence,
"reviewState": capability.status,
"ownership": "mixed",
"sourceReferences": [asdict(ref) for ref in capability.source_refs],
}
for feature in capability.features:
index[self._candidate_dependency_key("feature", feature.id)] = {
"name": feature.name,
"description": feature.location,
"primaryClass": feature.primary_class or feature.type,
"attributes": feature.attributes,
"confidence": feature.confidence,
"path": feature.location,
"reviewState": feature.status,
"ownership": "mixed",
"sourceReferences": [
asdict(source_ref) for source_ref in feature.source_refs
],
}
for evidence in capability.evidence:
index[self._candidate_dependency_key("evidence", evidence.id)] = {
"name": evidence.reference,
"description": evidence.type,
"primaryClass": evidence.type,
"attributes": [evidence.type, evidence.strength],
"confidence": self._evidence_confidence(evidence.strength),
"reviewState": evidence.status,
"ownership": "mixed",
"sourceReferences": [
asdict(source_ref) for source_ref in evidence.source_refs
],
}
return index
@@ -3223,6 +3649,134 @@ class RegistryService:
)
return edges
def _candidate_dependency_edges(
self,
candidate_graph: CandidateGraph,
ability_map: RepositoryAbilityMap,
) -> list[DependencyEdge]:
edges: list[DependencyEdge] = []
scope_key = self._candidate_dependency_key("scope", ability_map.scope.id)
for ability in candidate_graph.abilities:
ability_key = self._candidate_dependency_key("ability", ability.id)
edges.append(
self._dependency_edge(
source_kind="ability",
source_id=ability.id,
source_key=ability_key,
target_kind="scope",
target_id=ability_map.scope.id,
target_key=scope_key,
dependency_type="draft-summarizes",
strength="medium",
source="candidate_graph",
)
)
for source_ref in ability.source_refs:
edges.append(
self._dependency_edge(
source_kind="fact",
source_id=source_ref.fact_id,
source_key=self._source_ref_fact_key(source_ref),
target_kind="ability",
target_id=ability.id,
target_key=ability_key,
dependency_type="observes-draft",
strength="medium",
source="candidate_source_ref",
)
)
for capability in ability.capabilities:
capability_key = self._candidate_dependency_key(
"capability",
capability.id,
)
edges.append(
self._dependency_edge(
source_kind="capability",
source_id=capability.id,
source_key=capability_key,
target_kind="ability",
target_id=ability.id,
target_key=ability_key,
dependency_type="draft-realizes",
strength="medium",
source="candidate_graph",
)
)
for source_ref in capability.source_refs:
edges.append(
self._dependency_edge(
source_kind="fact",
source_id=source_ref.fact_id,
source_key=self._source_ref_fact_key(source_ref),
target_kind="capability",
target_id=capability.id,
target_key=capability_key,
dependency_type="observes-draft",
strength="medium",
source="candidate_source_ref",
)
)
for feature in capability.features:
feature_key = self._candidate_dependency_key("feature", feature.id)
edges.append(
self._dependency_edge(
source_kind="feature",
source_id=feature.id,
source_key=feature_key,
target_kind="capability",
target_id=capability.id,
target_key=capability_key,
dependency_type="draft-supports",
strength="medium",
source="candidate_graph",
)
)
for source_ref in feature.source_refs:
edges.append(
self._dependency_edge(
source_kind="fact",
source_id=source_ref.fact_id,
source_key=self._source_ref_fact_key(source_ref),
target_kind="feature",
target_id=feature.id,
target_key=feature_key,
dependency_type="observes-draft",
strength="medium",
source="candidate_source_ref",
)
)
for evidence in capability.evidence:
evidence_key = self._candidate_dependency_key("evidence", evidence.id)
edges.append(
self._dependency_edge(
source_kind="evidence",
source_id=evidence.id,
source_key=evidence_key,
target_kind="capability",
target_id=capability.id,
target_key=capability_key,
dependency_type="draft-supports",
strength=evidence.strength or "medium",
source="candidate_graph",
)
)
for source_ref in evidence.source_refs:
edges.append(
self._dependency_edge(
source_kind="fact",
source_id=source_ref.fact_id,
source_key=self._source_ref_fact_key(source_ref),
target_kind="evidence",
target_id=evidence.id,
target_key=evidence_key,
dependency_type="observes-draft",
strength=evidence.strength or "medium",
source="candidate_source_ref",
)
)
return edges
def _dependency_edge(
self,
*,
@@ -3253,6 +3807,9 @@ class RegistryService:
def _dependency_key(self, kind: str, item_id: int) -> str:
return f"{kind}:{item_id}"
def _candidate_dependency_key(self, kind: str, item_id: int) -> str:
return f"candidate:{kind}:{item_id}" if kind != "scope" else f"draft:scope:{item_id}"
def _source_ref_fact_key(self, source_ref) -> str:
return f"fact:{source_ref.kind}:{source_ref.path}:{source_ref.name}"

View File

@@ -1212,6 +1212,38 @@ def get_ability_map(
raise HTTPException(status_code=404, detail=str(exc)) from exc
@app.get(
"/repos/{repository_id}/intent/review",
tags=["scope"],
)
def review_repository_intent(
repository_id: int,
service: RegistryService = Depends(get_service),
) -> dict[str, object]:
try:
return service.document_review(repository_id, "INTENT.md")
except NotFoundError as exc:
raise HTTPException(status_code=404, detail=str(exc)) from exc
except ValueError as exc:
raise HTTPException(status_code=400, detail=str(exc)) from exc
@app.get(
"/repos/{repository_id}/scope/review",
tags=["scope"],
)
def review_repository_scope(
repository_id: int,
service: RegistryService = Depends(get_service),
) -> dict[str, object]:
try:
return service.document_review(repository_id, "SCOPE.md")
except NotFoundError as exc:
raise HTTPException(status_code=404, detail=str(exc)) from exc
except ValueError as exc:
raise HTTPException(status_code=400, detail=str(exc)) from exc
@app.get(
"/repos/{repository_id}/dependency-graph",
tags=["visualization"],

View File

@@ -1104,6 +1104,61 @@ def repository_scope_document(
)
@router.get("/ui/repos/{repository_id}/intent-review")
def repository_intent_review(
repository_id: int,
service: RegistryService = Depends(get_service),
) -> HTMLResponse:
return repository_document_review_page(repository_id, "INTENT.md", service)
@router.get("/ui/repos/{repository_id}/scope-review")
def repository_scope_review(
repository_id: int,
service: RegistryService = Depends(get_service),
) -> HTMLResponse:
return repository_document_review_page(repository_id, "SCOPE.md", service)
def repository_document_review_page(
repository_id: int,
document_name: str,
service: RegistryService,
) -> HTMLResponse:
payload = service.document_review(repository_id, document_name)
repository = service.get_repository(repository_id)
display_name = repository_display_name(repository)
current = str(payload.get("current_content") or "")
draft = str(payload.get("draft_content") or "")
provenance = payload.get("provenance") or {}
body = f"""
<div class="actions">
<h1 style="margin-right:auto">{escape(document_name)} Review</h1>
<a class="button secondary" href="/ui/repos/{repository_id}">Repository</a>
</div>
<section class="panel stack">
<p class="muted">{escape(str(payload.get("write_policy", "")))}</p>
<p><span class="pill">{'exists' if payload.get("exists") else 'missing'}</span>
<span class="source">{escape(str(payload.get("path", "")))}</span></p>
<label>Current {escape(document_name)}
<textarea rows="14" spellcheck="false">{escape(current)}</textarea>
</label>
<label>Draft {escape(document_name)}
<textarea rows="18" spellcheck="false">{escape(draft)}</textarea>
</label>
<p class="muted">analysis run {escape(str(provenance.get("analysis_run_id", "")))} ·
{escape(str(provenance.get("fact_count", 0)))} facts ·
{escape(str((provenance.get("candidate_counts") or {}).get("capabilities", 0)))} candidate capabilities</p>
</section>
"""
return page(
f"{document_name} Review",
body,
selected_repository=display_name,
selected_repository_id=repository.id,
)
@router.get("/ui/discovery")
def discovery_page(service: RegistryService = Depends(get_service)) -> HTMLResponse:
repositories = service.list_repositories()
@@ -1514,6 +1569,8 @@ def repository_detail(
<a class="button secondary" href="/ui/repos/{repository_id}/dependency-graph">Dependency Graph</a>
<a class="button secondary" href="/ui/repos/{repository_id}/export">Export</a>
<a class="button secondary" href="/ui/repos/{repository_id}/scope">SCOPE</a>
<a class="button secondary" href="/ui/repos/{repository_id}/scope-review">Scope Draft</a>
<a class="button secondary" href="/ui/repos/{repository_id}/intent-review">Intent Draft</a>
<a class="button secondary" href="/ui">Back</a>
</div>
<p class="muted">{escape(repository.description or '')}</p>