Implement scope-derived candidate review infrastructure

This commit is contained in:
2026-05-16 00:26:29 +02:00
parent f4d782c997
commit ba2228e889
14 changed files with 1740 additions and 39 deletions

View File

@@ -73,8 +73,8 @@
"severity": "critical",
"applies_to": ["ability", "capability", "feature", "evidence"],
"description": "Generated SCOPE.md text cannot be primary evidence for rebuilding the same characteristic model. It may be comparison context, bootstrap context, or a generated output under review.",
"deterministic_action": "rejected",
"deterministic_action_when": "A candidate is supported only or primarily by generated SCOPE.md content from the same scoping process.",
"deterministic_action": "requires_review",
"deterministic_action_when": "A candidate is supported only or primarily by generated or derived SCOPE.md content from the same scoping process.",
"reviewer_guidance": "Use source, docs, tests, and product intent instead of accepting circular evidence.",
"agentic_guidance": "Treat circular generated-scope evidence as a blocker unless independent evidence supports the same claim.",
"examples": [
@@ -95,6 +95,36 @@
"examples": [
"Schema examples mentioning model providers should not create native model-provider capabilities."
]
},
{
"id": "RREG-QC-007",
"title": "Template Boilerplate Is Not Repository Purpose",
"category": "template-contamination",
"severity": "high",
"applies_to": ["ability", "capability"],
"description": "Repository templates, seed README text, and bootstrap boilerplate should not become the repository's native ability or capability when more specific source evidence exists.",
"deterministic_action": "downgraded",
"deterministic_action_when": "Candidate names or descriptions are dominated by template boilerplate such as repo-seed instead of repo-specific evidence.",
"reviewer_guidance": "Prefer SCOPE, INTENT, implementation, or product docs that describe this repository, not the template it was created from.",
"agentic_guidance": "Detect template text and replace it with a repo-specific abstraction before proposing approval.",
"examples": [
"A README that says 'A git repository template to bootstrap coulomb projects' should not become the ability for ops-warden."
]
},
{
"id": "RREG-QC-008",
"title": "Scope-Derived Drafts Stay Separate From Intent",
"category": "scope-intent-separation",
"severity": "medium",
"applies_to": ["scope", "intent", "ability", "capability"],
"description": "Existing SCOPE.md content can bootstrap current-state candidates and draft intent, but it must remain clearly labeled as scope-derived until reviewed.",
"deterministic_action": "requires_review",
"deterministic_action_when": "A candidate or draft is generated from SCOPE.md rather than authored INTENT.md or implementation evidence.",
"reviewer_guidance": "Check whether the claim describes current behavior, desired future utility, or both. Do not write INTENT.md without explicit review.",
"agentic_guidance": "Use SCOPE.md to propose current-state candidates and ambitious intent drafts, but keep provenance and review status explicit.",
"examples": [
"A Railiance SCOPE.md capability block can create a candidate capability, not approved registry truth."
]
}
]
}

View File

@@ -8,6 +8,7 @@ from repo_scoping.acceptance.criteria import (
load_quality_criteria,
)
from repo_scoping.core.models import (
CandidateAbility,
CandidateCapability,
CandidateFeature,
CandidateGraph,
@@ -39,11 +40,33 @@ def evaluate_candidate_graph_quality(
active_registry = registry or load_quality_criteria()
outcomes: list[QualityGateOutcome] = []
for ability in graph.abilities:
outcomes.extend(evaluate_candidate_ability_quality(ability, active_registry))
for capability in ability.capabilities:
outcomes.extend(evaluate_candidate_capability_quality(capability, active_registry))
return outcomes
def evaluate_candidate_ability_quality(
ability: CandidateAbility,
registry: QualityCriteriaRegistry | None = None,
) -> list[QualityGateOutcome]:
active_registry = registry or load_quality_criteria()
criteria = {criterion.id: criterion for criterion in active_registry.criteria}
outcomes: list[QualityGateOutcome] = []
if _looks_template_contaminated(ability.name, ability.description):
outcomes.append(
_outcome(
active_registry,
criteria["RREG-QC-007"],
element_type="ability",
element_id=ability.id,
element_name=ability.name,
reason="Candidate ability appears to be based on template boilerplate.",
)
)
return outcomes
def evaluate_candidate_capability_quality(
capability: CandidateCapability,
registry: QualityCriteriaRegistry | None = None,
@@ -75,6 +98,17 @@ def evaluate_candidate_capability_quality(
reason="Candidate is supported only by generated SCOPE.md evidence.",
)
)
elif _has_scope_refs_or_attributes(refs, capability.attributes):
outcomes.append(
_outcome(
active_registry,
criteria["RREG-QC-008"],
element_type="capability",
element_id=capability.id,
element_name=capability.name,
reason="Candidate is scope-derived and must remain review-only until separated from intent.",
)
)
elif _all_weak_source_refs(refs):
outcomes.append(
_outcome(
@@ -97,6 +131,18 @@ def evaluate_candidate_capability_quality(
)
)
if _looks_template_contaminated(capability.name, capability.description):
outcomes.append(
_outcome(
active_registry,
criteria["RREG-QC-007"],
element_type="capability",
element_id=capability.id,
element_name=capability.name,
reason="Candidate capability appears to be based on template boilerplate.",
)
)
if _looks_like_provider_routing(capability):
outcomes.append(
_outcome(
@@ -197,6 +243,25 @@ def _all_generated_scope_refs(refs: list[SourceReference]) -> bool:
return bool(refs) and all(ref.path.endswith("SCOPE.md") for ref in refs)
def _has_scope_refs_or_attributes(
refs: list[SourceReference],
attributes: list[str],
) -> bool:
return any(ref.path.endswith("SCOPE.md") for ref in refs) or any(
attribute in {"scope-derived", "review-required-scope"}
for attribute in attributes
)
def _looks_template_contaminated(name: str, description: str) -> bool:
text = f"{name} {description}".lower()
return (
"repo-seed" in text
or "git repository template to bootstrap" in text
or "bootstrap coulomb projects" in text
)
def _all_weak_source_refs(refs: list[SourceReference]) -> bool:
return bool(refs) and all(_is_weak_source_ref(ref) for ref in refs)

View File

@@ -275,6 +275,8 @@ class CandidateGraphGenerator:
manifests = self._facts(facts, "manifest")
frameworks = self._facts(facts, "framework")
languages = self._facts(facts, "language")
configs = self._facts(facts, "config")
scope_facts = self._facts(facts, "scope")
llm_providers = self._facts(facts, "llm_provider")
credential_configs = self._facts(facts, "credential_config")
provider_registries = self._facts(facts, "provider_registry")
@@ -286,7 +288,7 @@ class CandidateGraphGenerator:
chunks,
)
ability_sources = docs or manifests or languages
ability_sources = docs or scope_facts or manifests or languages or configs
ability = CandidateAbilityDraft(
name=self._ability_name(repository, chunks),
description=self._ability_description(chunks),
@@ -308,6 +310,15 @@ class CandidateGraphGenerator:
capabilities.extend(
self._intent_capabilities(intent_facts, chunks, tests, examples, docs)
)
capabilities.extend(
self._scope_capabilities(
scope_facts,
chunks,
tests,
examples,
allow_summary_fallback=not intent_facts,
)
)
capabilities.extend(
self._repo_scoping_native_capabilities(
repository,
@@ -347,6 +358,18 @@ class CandidateGraphGenerator:
capabilities.append(
self._interface_capability(interfaces, tests, examples, docs, chunks)
)
if not capabilities:
capabilities.extend(
self._fact_derived_capabilities(
configs=configs,
manifests=manifests,
frameworks=frameworks,
languages=languages,
docs=docs,
tests=tests,
chunks=chunks,
)
)
return [
CandidateAbilityDraft(
@@ -582,6 +605,257 @@ class CandidateGraphGenerator:
words.pop()
return self._title_from_words(words[:10])
def _scope_capabilities(
self,
scope_facts: list[ObservedFact],
chunks: list[ContentChunk],
tests: list[ObservedFact],
examples: list[ObservedFact],
*,
allow_summary_fallback: bool = True,
) -> list[CandidateCapabilityDraft]:
scope_chunks = [
chunk
for chunk in chunks
if chunk.kind == "scope"
or chunk.metadata.get("source_role") == "derived_scope"
or chunk.path.lower().endswith("scope.md")
]
if not scope_chunks:
return []
source_refs = self._source_refs(scope_facts)
capabilities: list[CandidateCapabilityDraft] = []
seen: set[str] = set()
for block in self._scope_capability_blocks(scope_chunks):
title = block.get("title", "").strip()
if not title:
continue
key = title.lower()
if key in seen:
continue
seen.add(key)
capability_type = block.get("type", "scope-derived").strip() or "scope-derived"
description = block.get("description", "").strip()
keywords = self._scope_keywords(block.get("keywords", ""))
attributes = self._unique(
[
capability_type,
*keywords,
"scope-derived",
"current-state",
"review-required-scope",
]
)
feature = CandidateFeatureDraft(
name=title,
type=capability_type,
location="SCOPE.md",
confidence=0.55,
source_refs=source_refs,
primary_class=capability_type,
attributes=self._unique(
[capability_type, "scope-defined", "review-required-scope"]
),
)
capabilities.append(
CandidateCapabilityDraft(
name=title,
description=(
"Reviewable current-state capability extracted from "
f"SCOPE.md: {description or title}"
),
inputs=[],
outputs=[title],
confidence=self._confidence(
0.45,
[
(0.10, bool(description)),
(0.05, bool(keywords)),
(0.05, bool(tests)),
(0.05, bool(examples)),
],
),
source_refs=source_refs,
primary_class=capability_type,
attributes=attributes,
features=[feature],
evidence=[
CandidateEvidenceDraft(
type="scope-current-state",
reference="SCOPE.md",
strength="medium",
source_refs=source_refs,
)
],
)
)
if capabilities or not allow_summary_fallback:
return capabilities
fallback_name = self._scope_summary_capability_name(scope_chunks)
if not fallback_name:
return []
return [
CandidateCapabilityDraft(
name=fallback_name,
description=(
"Reviewable current-state capability inferred from SCOPE.md "
"summary text. A curator should split this into more precise "
"capabilities when reviewing."
),
inputs=[],
outputs=[fallback_name],
confidence=0.45,
source_refs=source_refs,
primary_class="scope-derived",
attributes=[
"scope-derived",
"current-state",
"review-required-scope",
],
evidence=[
CandidateEvidenceDraft(
type="scope-current-state",
reference="SCOPE.md",
strength="weak",
source_refs=source_refs,
)
],
)
]
def _scope_capability_blocks(
self,
chunks: list[ContentChunk],
) -> list[dict[str, str]]:
blocks: list[dict[str, str]] = []
in_block = False
current: dict[str, str] = {}
current_key = ""
for chunk in sorted(chunks, key=lambda item: (item.path, item.start_line)):
for raw_line in chunk.text.splitlines():
line = raw_line.rstrip()
stripped = line.strip()
if stripped.startswith("```capability"):
in_block = True
current = {}
current_key = ""
continue
if in_block and stripped.startswith("```"):
if current:
blocks.append(current)
in_block = False
current = {}
current_key = ""
continue
if not in_block:
continue
key, separator, value = stripped.partition(":")
if separator and re.match(r"^[A-Za-z_][A-Za-z0-9_-]*$", key):
current_key = key.lower()
current[current_key] = value.strip().strip('"')
elif current_key and stripped:
current[current_key] = (
f"{current[current_key]} {stripped.strip()}"
).strip()
return blocks
def _scope_keywords(self, value: str) -> list[str]:
cleaned = value.strip()
if cleaned.startswith("[") and cleaned.endswith("]"):
cleaned = cleaned[1:-1]
return [
item.strip(" `\"'")
for item in cleaned.split(",")
if item.strip(" `\"'")
][:8]
def _scope_summary_capability_name(self, chunks: list[ContentChunk]) -> str:
one_liner = self._scope_one_liner(chunks)
if one_liner:
return self._imperative_purpose(one_liner)
return ""
def _fact_derived_capabilities(
self,
*,
configs: list[ObservedFact],
manifests: list[ObservedFact],
frameworks: list[ObservedFact],
languages: list[ObservedFact],
docs: list[ObservedFact],
tests: list[ObservedFact],
chunks: list[ContentChunk],
) -> list[CandidateCapabilityDraft]:
if not configs:
return []
capability_facts = configs + manifests + frameworks + languages
if not capability_facts:
return []
features: list[CandidateFeatureDraft] = []
for label, kind, facts in (
("Manage Repository Configuration", "configuration", configs),
("Declare Runtime And Package Manifests", "manifest", manifests),
("Use Detected Frameworks", "framework", frameworks),
("Provide Implementation In Detected Languages", "implementation", languages),
):
if not facts:
continue
features.append(
CandidateFeatureDraft(
name=label,
type=kind,
location=self._grouped_location(facts),
confidence=0.45,
source_refs=self._source_refs(facts),
primary_class=kind,
attributes=[kind, "fact-derived", "review-required"],
)
)
if not features:
return []
name = self._fact_derived_capability_name(chunks, features)
return [
CandidateCapabilityDraft(
name=name,
description=(
"Reviewable capability inferred from deterministic facts. "
"This fills the hierarchy when no stronger intent, scope "
"capability, or interface candidate exists."
),
inputs=self._feature_inputs(features),
outputs=self._feature_outputs(features),
confidence=self._confidence(
0.35,
[
(0.10, bool(configs)),
(0.10, bool(manifests)),
(0.05, bool(frameworks)),
(0.05, bool(tests)),
(0.05, bool(docs)),
],
),
source_refs=self._source_refs(capability_facts),
primary_class="fact-derived",
attributes=["fact-derived", "review-required", "partial-hierarchy"],
features=features,
evidence=self._evidence(tests, [], docs),
)
]
def _fact_derived_capability_name(
self,
chunks: list[ContentChunk],
features: list[CandidateFeatureDraft],
) -> str:
scope_name = self._scope_summary_capability_name(chunks)
if scope_name:
return scope_name
if any(feature.type == "configuration" for feature in features):
return "Manage Repository Configuration"
if any(feature.type == "manifest" for feature in features):
return "Declare Repository Runtime"
return "Describe Repository Implementation"
def _repo_scoping_native_capabilities(
self,
repository: Repository,
@@ -1219,40 +1493,110 @@ class CandidateGraphGenerator:
ops_name = self._operations_ability_name(chunks)
if ops_name:
return ops_name
purpose_text = self._document_purpose_sentence(chunks) or repository.description
purpose_text = (
self._intent_purpose_sentence(chunks)
or self._scope_one_liner(chunks)
or self._documentation_purpose_sentence(chunks)
or repository.description
)
if purpose_text:
normalized = self._imperative_purpose(purpose_text)
if normalized:
return normalized
return f"Support {self._humanize_identifier(repository.name)}"
def _document_purpose_sentence(self, chunks: list[ContentChunk]) -> str:
for chunk in self._purpose_chunks(chunks):
def _intent_purpose_sentence(self, chunks: list[ContentChunk]) -> str:
return self._purpose_sentence_for_chunks(
[
chunk
for chunk in self._purpose_chunks(chunks)
if chunk.kind == "intent"
or chunk.metadata.get("source_role") == "intent_summary"
or chunk.path.lower().endswith("intent.md")
]
)
def _documentation_purpose_sentence(self, chunks: list[ContentChunk]) -> str:
return self._purpose_sentence_for_chunks(
[
chunk
for chunk in self._purpose_chunks(chunks)
if chunk.kind == "documentation"
and chunk.metadata.get("source_role") != "derived_scope"
and not chunk.path.lower().endswith("scope.md")
]
)
def _purpose_sentence_for_chunks(self, chunks: list[ContentChunk]) -> str:
for chunk in chunks:
if chunk.kind not in {"intent", "documentation"}:
continue
lines = [line.strip() for line in chunk.text.splitlines() if line.strip()]
paragraph = next((line for line in lines if not line.startswith("#")), "")
if paragraph:
if paragraph and not self._is_template_boilerplate(paragraph):
return paragraph
return ""
def _scope_one_liner(self, chunks: list[ContentChunk]) -> str:
for chunk in sorted(chunks, key=lambda item: (item.path, item.start_line)):
if not (
chunk.kind == "scope"
or chunk.metadata.get("source_role") == "derived_scope"
or chunk.path.lower().endswith("scope.md")
):
continue
lines = chunk.text.splitlines()
for index, raw_line in enumerate(lines):
if raw_line.strip().lower() == "## one-liner":
for following in lines[index + 1 :]:
candidate = following.strip()
if not candidate or candidate.startswith("---"):
continue
if candidate.startswith(">"):
continue
return candidate.strip(" .")
before_first_section: list[str] = []
for raw_line in lines:
candidate = raw_line.strip()
if candidate.startswith("## "):
break
before_first_section.append(candidate)
for candidate in before_first_section:
if (
candidate
and not candidate.startswith("#")
and not candidate.startswith(">")
and not candidate.startswith("---")
and not self._is_template_boilerplate(candidate)
):
return candidate.strip(" .")
return ""
def _is_template_boilerplate(self, text: str) -> bool:
lowered = text.lower()
return (
"git repository template to bootstrap" in lowered
or "this file helps you quickly understand" in lowered
or "intentionally lightweight and may be incomplete" in lowered
)
def _purpose_chunks(self, chunks: list[ContentChunk]) -> list[ContentChunk]:
def priority(chunk: ContentChunk) -> tuple[int, str, int]:
role = chunk.metadata.get("source_role")
path = chunk.path.lower()
if role == "intent_summary" or path.endswith("intent.md"):
return (0, path, chunk.start_line)
if role == "product_documentation" or path.startswith("readme"):
return (1, path, chunk.start_line)
if role == "derived_scope" or path.endswith("scope.md"):
return (3, path, chunk.start_line)
return (2, path, chunk.start_line)
return (1, path, chunk.start_line)
if role == "product_documentation" or path.startswith("readme"):
return (2, path, chunk.start_line)
return (3, path, chunk.start_line)
return sorted(
[
chunk
for chunk in chunks
if chunk.kind in {"intent", "documentation"}
if chunk.kind in {"intent", "documentation", "scope"}
and chunk.metadata.get("source_role") != "agent_guidance"
],
key=priority,
@@ -1284,9 +1628,11 @@ class CandidateGraphGenerator:
if not words:
return ""
words[0] = self._imperative_verb(words[0])
return self._title_from_words(words[:8])
return self._title_from_words(words[:10])
def _imperative_verb(self, word: str) -> str:
if word.isupper():
return word
lower = word.lower().strip(",;:")
irregular = {
"does": "do",
@@ -1313,7 +1659,7 @@ class CandidateGraphGenerator:
for word in words
]
return " ".join(
word[:1].upper() + word[1:]
word if word.isupper() else word[:1].upper() + word[1:]
for word in cleaned_words
if word
)
@@ -1341,17 +1687,37 @@ class CandidateGraphGenerator:
lines = [line.strip() for line in chunk.text.splitlines() if line.strip()]
if not lines:
continue
if chunk.kind == "scope" or chunk.metadata.get("source_role") == "derived_scope":
one_liner = self._scope_one_liner([chunk])
if one_liner:
return f"SCOPE. {one_liner}"
heading = next((line.lstrip("#").strip() for line in lines if line.startswith("#")), "")
paragraph = next((line for line in lines if not line.startswith("#")), "")
if self._is_template_boilerplate(paragraph):
paragraph = ""
if heading and paragraph:
return f"{heading}. {paragraph}"
return heading or paragraph
return ""
def _documentation_chunks(self, chunks: list[ContentChunk]) -> list[ContentChunk]:
def priority(chunk: ContentChunk) -> tuple[int, str, int]:
role = chunk.metadata.get("source_role")
path = chunk.path.lower()
if chunk.kind == "intent" or role == "intent_summary" or path.endswith("intent.md"):
return (0, path, chunk.start_line)
if chunk.kind == "scope" or role == "derived_scope" or path.endswith("scope.md"):
return (1, path, chunk.start_line)
return (2, path, chunk.start_line)
return sorted(
[chunk for chunk in chunks if chunk.kind in {"intent", "documentation"}],
key=lambda chunk: (0 if chunk.kind == "intent" else 1, chunk.path, chunk.start_line),
[
chunk
for chunk in chunks
if chunk.kind in {"intent", "documentation", "scope"}
and chunk.metadata.get("source_role") != "agent_guidance"
],
key=priority,
)
def _interface_summary(self, chunks: list[ContentChunk]) -> str:

View File

@@ -186,6 +186,19 @@ def build_parser() -> argparse.ArgumentParser:
default="markdown",
help="Inventory output format.",
)
dataset = subparsers.add_parser(
"assess-dataset",
help="Summarize repository generation coverage across the local dataset.",
)
dataset.add_argument("--database-path", help="Override REPO_SCOPING_DATABASE_PATH.")
dataset.add_argument("--checkout-root", help="Override REPO_SCOPING_CHECKOUT_ROOT.")
dataset.add_argument("--output", help="Write dataset assessment to this path instead of stdout.")
dataset.add_argument(
"--format",
choices=["json", "markdown"],
default="markdown",
help="Dataset assessment output format.",
)
return parser
@@ -204,6 +217,8 @@ def main(argv: Sequence[str] | None = None) -> int:
return list_quality_criteria_command(args)
if args.command == "list-legacy-auto-approvals":
return list_legacy_auto_approvals_command(args)
if args.command == "assess-dataset":
return assess_dataset_command(args)
parser.error(f"unknown command: {args.command}")
return 2
@@ -285,6 +300,235 @@ def list_legacy_auto_approvals_command(args: argparse.Namespace) -> int:
return 0
def assess_dataset_command(args: argparse.Namespace) -> int:
service = service_from_args(args)
report = dataset_assessment(service)
content = (
json.dumps(report, indent=2) + "\n"
if args.format == "json"
else dataset_assessment_markdown(report)
)
if args.output:
write_text(args.output, content)
else:
print(content, end="" if content.endswith("\n") else "\n")
return 0
def dataset_assessment(service: RegistryService) -> dict[str, object]:
repositories = []
totals = {
"repositories": 0,
"facts": 0,
"content_chunks": 0,
"candidate_abilities": 0,
"candidate_capabilities": 0,
"candidate_features": 0,
"candidate_evidence": 0,
"approved_abilities": 0,
"approved_capabilities": 0,
"approved_features": 0,
"approved_evidence": 0,
"dependency_graph_nodes": 0,
"dependency_graph_edges": 0,
}
for repository in service.list_repositories():
runs = service.list_analysis_runs(repository.id)
latest_run = next((run for run in reversed(runs) if run.status == "completed"), None)
facts = service.list_observed_facts(repository.id, latest_run.id) if latest_run else []
chunks = service.list_content_chunks(repository.id, latest_run.id) if latest_run else []
candidate_counts = {
"abilities": 0,
"capabilities": 0,
"features": 0,
"evidence": 0,
}
candidate_names: list[str] = []
if latest_run is not None:
try:
graph = service.candidate_graph(repository.id, latest_run.id)
except NotFoundError:
graph = None
if graph is not None:
candidate_counts = candidate_graph_counts(graph)
candidate_names = [
ability.name
for ability in graph.abilities
][:5]
ability_map = service.ability_map(repository.id)
approved_counts = approved_graph_counts(ability_map)
graph_metrics = {"node_count": 0, "edge_count": 0}
try:
dependency_graph = service.dependency_graph_elements(repository.id)
graph_metrics = {
"node_count": int(dependency_graph["metrics"]["node_count"]),
"edge_count": int(dependency_graph["metrics"]["edge_count"]),
}
except (NotFoundError, ValueError):
pass
snapshot = (
service.store.get_snapshot(latest_run.snapshot_id)
if latest_run is not None and latest_run.snapshot_id is not None
else None
)
doc_presence = document_presence(snapshot.source_path if snapshot else "")
issues = dataset_assessment_issues(
fact_count=len(facts),
chunk_count=len(chunks),
candidate_counts=candidate_counts,
approved_counts=approved_counts,
graph_metrics=graph_metrics,
doc_presence=doc_presence,
candidate_names=candidate_names,
)
repositories.append(
{
"repository_id": repository.id,
"name": repository.name,
"status": repository.status,
"latest_analysis_run_id": latest_run.id if latest_run else None,
"latest_analysis_run_status": latest_run.status if latest_run else None,
"facts": len(facts),
"content_chunks": len(chunks),
"candidate_counts": candidate_counts,
"approved_counts": approved_counts,
"dependency_graph": graph_metrics,
"documents": doc_presence,
"candidate_ability_names": candidate_names,
"issues": issues,
}
)
totals["repositories"] += 1
totals["facts"] += len(facts)
totals["content_chunks"] += len(chunks)
totals["candidate_abilities"] += candidate_counts["abilities"]
totals["candidate_capabilities"] += candidate_counts["capabilities"]
totals["candidate_features"] += candidate_counts["features"]
totals["candidate_evidence"] += candidate_counts["evidence"]
totals["approved_abilities"] += approved_counts["abilities"]
totals["approved_capabilities"] += approved_counts["capabilities"]
totals["approved_features"] += approved_counts["features"]
totals["approved_evidence"] += approved_counts["evidence"]
totals["dependency_graph_nodes"] += graph_metrics["node_count"]
totals["dependency_graph_edges"] += graph_metrics["edge_count"]
return {
"schema_version": "repo-scoping-dataset-assessment/v1",
"summary": totals,
"repositories": repositories,
}
def candidate_graph_counts(graph) -> dict[str, int]:
capabilities = [
capability
for ability in graph.abilities
for capability in ability.capabilities
]
return {
"abilities": len(graph.abilities),
"capabilities": len(capabilities),
"features": sum(len(capability.features) for capability in capabilities),
"evidence": sum(len(capability.evidence) for capability in capabilities),
}
def approved_graph_counts(ability_map) -> dict[str, int]:
capabilities = [
capability
for ability in ability_map.abilities
for capability in ability.capabilities
]
return {
"scope": 1 if ability_map.scope else 0,
"abilities": len(ability_map.abilities),
"capabilities": len(capabilities),
"features": sum(len(capability.features) for capability in capabilities),
"evidence": sum(len(capability.evidence) for capability in capabilities),
}
def document_presence(source_path: str) -> dict[str, bool]:
if not source_path:
return {
"INTENT.md": False,
"SCOPE.md": False,
"README": False,
"CLAUDE.md": False,
"AGENTS.md": False,
}
root = Path(source_path)
return {
"INTENT.md": (root / "INTENT.md").is_file(),
"SCOPE.md": (root / "SCOPE.md").is_file(),
"README": any(root.glob("README*")),
"CLAUDE.md": (root / "CLAUDE.md").is_file(),
"AGENTS.md": (root / "AGENTS.md").is_file(),
}
def dataset_assessment_issues(
*,
fact_count: int,
chunk_count: int,
candidate_counts: dict[str, int],
approved_counts: dict[str, int],
graph_metrics: dict[str, int],
doc_presence: dict[str, bool],
candidate_names: list[str],
) -> list[str]:
issues: list[str] = []
if fact_count and not candidate_counts["capabilities"]:
issues.append("facts-without-candidate-capabilities")
if chunk_count and doc_presence.get("SCOPE.md") and not candidate_counts["capabilities"]:
issues.append("scope-text-unused-for-lower-hierarchy")
if fact_count and not graph_metrics["node_count"]:
issues.append("facts-with-empty-dependency-graph")
if approved_counts["abilities"] == 0 and graph_metrics["node_count"] == 0:
issues.append("approved-hierarchy-missing-and-no-graph-fallback")
if any("repo-seed" in name.lower() for name in candidate_names):
issues.append("template-readme-contamination")
return issues
def dataset_assessment_markdown(report: dict[str, object]) -> str:
lines = ["# Repo-Scoping Dataset Assessment", ""]
summary = report["summary"]
lines.extend(
[
f"- Repositories: {summary['repositories']}",
f"- Facts: {summary['facts']}",
f"- Candidate hierarchy: {summary['candidate_abilities']} abilities / "
f"{summary['candidate_capabilities']} capabilities / "
f"{summary['candidate_features']} features / "
f"{summary['candidate_evidence']} evidence",
f"- Approved hierarchy: {summary['approved_abilities']} abilities / "
f"{summary['approved_capabilities']} capabilities / "
f"{summary['approved_features']} features / "
f"{summary['approved_evidence']} evidence",
f"- Dependency graph: {summary['dependency_graph_nodes']} nodes / "
f"{summary['dependency_graph_edges']} edges",
"",
"| Repo | Run | Facts | Chunks | Candidate | Approved | Graph | Issues |",
"| --- | ---: | ---: | ---: | --- | --- | --- | --- |",
]
)
for item in report["repositories"]:
candidate = item["candidate_counts"]
approved = item["approved_counts"]
graph = item["dependency_graph"]
lines.append(
f"| {item['name']} | {item['latest_analysis_run_id'] or '-'} | "
f"{item['facts']} | {item['content_chunks']} | "
f"{candidate['abilities']}/{candidate['capabilities']}/"
f"{candidate['features']}/{candidate['evidence']} | "
f"{approved['abilities']}/{approved['capabilities']}/"
f"{approved['features']}/{approved['evidence']} | "
f"{graph['node_count']}/{graph['edge_count']} | "
f"{', '.join(item['issues']) or '-'} |"
)
return "\n".join(lines) + "\n"
def legacy_auto_approval_records_markdown(records) -> str:
if not records:
return "No legacy trusted auto-approval records found.\n"

View File

@@ -2,6 +2,7 @@ from __future__ import annotations
from collections.abc import Sequence
from dataclasses import asdict, replace
from pathlib import Path
from typing import Any
from repo_scoping.acceptance import (
@@ -1492,6 +1493,22 @@ class RegistryService:
ability_map = self.store.get_ability_map(repository_id)
facts_by_id = {fact.id: fact for fact in self.store.list_observed_facts(repository_id)}
characteristic_index = self._dependency_characteristic_index(ability_map)
latest_candidate_graph = None
include_candidates = not ability_map.abilities
if include_candidates:
latest_run = self._latest_completed_run(repository_id)
latest_candidate_graph = (
self._candidate_graph_or_none(repository_id, latest_run.id)
if latest_run is not None
else None
)
if latest_candidate_graph is not None:
characteristic_index.update(
self._candidate_dependency_characteristic_index(
latest_candidate_graph,
ability_map,
)
)
nodes: dict[str, dict[str, object]] = {}
edge_sources: dict[str, DependencyEdge] = {}
@@ -1513,6 +1530,16 @@ class RegistryService:
if (display_edge := self._dependency_display_edge(edge, facts_by_id))
is not None
]
if latest_candidate_graph is not None:
graph_edges.extend(
display_edge
for edge in self._candidate_dependency_edges(
latest_candidate_graph,
ability_map,
)
if (display_edge := self._dependency_display_edge(edge, facts_by_id))
is not None
)
def ensure_node(kind: str, key: str, item_id: int | None) -> None:
if key in nodes:
@@ -1555,7 +1582,7 @@ class RegistryService:
"layer": self._dependency_layer(kind),
"label": detail.get("label")
or self._dependency_node_label(repository_id, kind, key, item_id),
"reviewState": "accepted",
"reviewState": detail.get("reviewState", "accepted"),
"name": detail.get("name")
or self._dependency_node_label(repository_id, kind, key, item_id),
"description": detail.get("description", ""),
@@ -1563,7 +1590,7 @@ class RegistryService:
"attributes": detail.get("attributes", []),
"confidence": detail.get("confidence"),
"visualSize": self._dependency_node_size(detail.get("confidence")),
"ownership": self._ownership_for_kind(kind),
"ownership": detail.get("ownership", self._ownership_for_kind(kind)),
"freshnessState": (
impact_item.freshness_state
if impact_item is not None
@@ -1587,6 +1614,7 @@ class RegistryService:
class_name
for class_name in (
kind,
str(detail.get("reviewState", "accepted")),
"stale" if impact_item is not None else "current",
"changed" if is_changed_fact else "",
)
@@ -2627,6 +2655,336 @@ class RegistryService:
)
return self.store.get_ability_map(repository_id)
def document_review(
self,
repository_id: int,
document_name: str,
) -> dict[str, object]:
normalized = document_name.upper()
if normalized not in {"INTENT.MD", "SCOPE.MD"}:
raise ValueError("document_name must be INTENT.md or SCOPE.md")
repository = self.store.get_repository(repository_id)
latest_run = self._latest_completed_run(repository_id)
facts = (
self.store.list_observed_facts(repository_id, latest_run.id)
if latest_run is not None
else []
)
chunks = (
self.store.list_content_chunks(repository_id, latest_run.id)
if latest_run is not None
else []
)
ability_map = self.store.get_ability_map(repository_id)
candidate_graph = (
self._candidate_graph_or_none(repository_id, latest_run.id)
if latest_run is not None
else None
)
snapshot = (
self.store.get_snapshot(latest_run.snapshot_id)
if latest_run is not None and latest_run.snapshot_id is not None
else None
)
source_root = Path(snapshot.source_path) if snapshot is not None else None
filename = "INTENT.md" if normalized == "INTENT.MD" else "SCOPE.md"
current_path = source_root / filename if source_root is not None else None
current_content = ""
if current_path is not None and current_path.is_file():
current_content = current_path.read_text(encoding="utf-8", errors="ignore")
draft_content = (
self._draft_intent_document(repository, ability_map, candidate_graph, facts, chunks)
if normalized == "INTENT.MD"
else self._draft_scope_document(repository, ability_map, candidate_graph, facts, chunks)
)
return {
"repository": asdict(repository),
"document": filename,
"path": str(current_path) if current_path is not None else "",
"exists": bool(current_content),
"current_content": current_content,
"draft_content": draft_content,
"draft_kind": "ambitious-intent" if normalized == "INTENT.MD" else "current-scope",
"write_policy": (
"review-only; repo-scoping does not write INTENT.md automatically"
if normalized == "INTENT.MD"
else "review-only from this endpoint; use the explicit scope write endpoint to write"
),
"provenance": self._document_review_provenance(
latest_run.id if latest_run is not None else None,
facts,
chunks,
candidate_graph,
),
}
def _latest_completed_run(self, repository_id: int) -> AnalysisRun | None:
completed = [
run
for run in self.store.list_analysis_runs(repository_id)
if run.status == "completed"
]
return completed[-1] if completed else None
def _candidate_graph_or_none(
self,
repository_id: int,
analysis_run_id: int,
) -> CandidateGraph | None:
try:
return self.store.get_candidate_graph(repository_id, analysis_run_id)
except NotFoundError:
return None
def _draft_intent_document(
self,
repository: Repository,
ability_map: RepositoryAbilityMap,
candidate_graph: CandidateGraph | None,
facts: list[ObservedFact],
chunks: list[ContentChunk],
) -> str:
one_liner = (
self._scope_one_liner_from_chunks(chunks)
or ability_map.scope.description
or repository.description
or f"{repository.name} should provide clearly reviewable repository utility."
)
capabilities = self._draft_capability_names(ability_map, candidate_graph)
boundaries = self._scope_section_items(chunks, "Not Relevant When")
related = self._scope_section_items(chunks, "Related / Overlapping")
lines = [
"# INTENT",
"",
"> Draft generated by repo-scoping for review.",
"> This is ambitious design intent derived from current scope, facts, and candidates.",
"> It is not written automatically.",
"",
"## Purpose",
"",
self._ambitious_intent_sentence(one_liner),
"",
"## Intended Capabilities",
"",
]
if capabilities:
lines.extend(f"- {name}" for name in capabilities)
else:
lines.append("- <!-- needs curator input -->")
lines.extend(["", "## Success Criteria", ""])
lines.extend(
[
"- The repository's useful behavior can be explained from source-linked evidence.",
"- Candidate capabilities can be reviewed without relying on template boilerplate.",
"- Scope and intent remain separate: current behavior informs but does not define ambition.",
]
)
lines.extend(["", "## Boundaries", ""])
if boundaries:
lines.extend(boundaries)
else:
lines.append("- <!-- needs curator input -->")
if related:
lines.extend(["", "## Related Repositories", ""])
lines.extend(related)
return "\n".join(lines).rstrip() + "\n"
def _draft_scope_document(
self,
repository: Repository,
ability_map: RepositoryAbilityMap,
candidate_graph: CandidateGraph | None,
facts: list[ObservedFact],
chunks: list[ContentChunk],
) -> str:
one_liner = (
self._scope_one_liner_from_chunks(chunks)
or ability_map.scope.description
or repository.description
or f"{repository.name} has observed repository behavior under review."
)
capabilities = self._draft_capability_names(ability_map, candidate_graph)
relevant = self._scope_section_items(chunks, "Relevant When")
not_relevant = self._scope_section_items(chunks, "Not Relevant When")
paths = sorted({fact.path for fact in facts if fact.path})[:8]
lines = [
"# SCOPE",
"",
"> Draft generated by repo-scoping for review.",
"> This describes current understood behavior and should be edited before writing.",
"",
"---",
"",
"## One-liner",
"",
one_liner,
"",
"## Core Idea",
"",
self._scope_core_idea_from_chunks(chunks) or one_liner,
"",
"## Relevant When",
"",
]
lines.extend(relevant or ["- <!-- needs curator input -->"])
lines.extend(["", "## Not Relevant When", ""])
lines.extend(not_relevant or ["- <!-- needs curator input -->"])
lines.extend(["", "## Current State", ""])
lines.extend(
[
f"- Repository status: {repository.status}",
f"- Facts observed: {len(facts)}",
f"- Candidate capabilities: {len(capabilities)}",
]
)
lines.extend(["", "## Getting Oriented", ""])
if paths:
lines.extend(
[
f"- Start with: {paths[0]}",
f"- Key files / directories: {', '.join(paths)}",
]
)
else:
lines.append("- <!-- needs curator input -->")
lines.extend(["", "## Provided Capabilities", ""])
if capabilities:
for name in capabilities:
lines.extend(
[
"```capability",
"type: draft",
f"title: {name}",
"description: Review this candidate capability before treating it as scope truth.",
"keywords: [draft, review-required]",
"```",
"",
]
)
else:
lines.append("<!-- needs curator input -->")
return "\n".join(lines).rstrip() + "\n"
def _document_review_provenance(
self,
analysis_run_id: int | None,
facts: list[ObservedFact],
chunks: list[ContentChunk],
candidate_graph: CandidateGraph | None,
) -> dict[str, object]:
return {
"analysis_run_id": analysis_run_id,
"fact_count": len(facts),
"content_chunk_count": len(chunks),
"candidate_counts": (
{
"abilities": len(candidate_graph.abilities),
"capabilities": sum(
len(ability.capabilities) for ability in candidate_graph.abilities
),
"features": sum(
len(capability.features)
for ability in candidate_graph.abilities
for capability in ability.capabilities
),
"evidence": sum(
len(capability.evidence)
for ability in candidate_graph.abilities
for capability in ability.capabilities
),
}
if candidate_graph is not None
else {
"abilities": 0,
"capabilities": 0,
"features": 0,
"evidence": 0,
}
),
"source_paths": sorted({fact.path for fact in facts if fact.path})[:12],
}
def _draft_capability_names(
self,
ability_map: RepositoryAbilityMap,
candidate_graph: CandidateGraph | None,
) -> list[str]:
approved = [
capability.name
for ability in ability_map.abilities
for capability in ability.capabilities
]
if approved:
return approved[:12]
if candidate_graph is None:
return []
names = [
capability.name
for ability in candidate_graph.abilities
for capability in ability.capabilities
if capability.status == "candidate"
]
if names:
return names[:12]
return [ability.name for ability in candidate_graph.abilities[:3]]
def _scope_one_liner_from_chunks(self, chunks: list[ContentChunk]) -> str:
for chunk in self._scope_chunks(chunks):
lines = chunk.text.splitlines()
for index, raw_line in enumerate(lines):
if raw_line.strip().lower() == "## one-liner":
for following in lines[index + 1 :]:
candidate = following.strip()
if candidate and not candidate.startswith(("---", ">")):
return candidate.strip(" .")
return ""
def _scope_core_idea_from_chunks(self, chunks: list[ContentChunk]) -> str:
items = self._scope_section_items(chunks, "Core Idea")
return "\n".join(items) if items else ""
def _scope_section_items(
self,
chunks: list[ContentChunk],
section_name: str,
) -> list[str]:
wanted = section_name.lower()
items: list[str] = []
in_section = False
for chunk in self._scope_chunks(chunks):
for raw_line in chunk.text.splitlines():
line = raw_line.strip()
if line.startswith("## "):
in_section = line.lstrip("#").strip().lower() == wanted
continue
if not in_section or not line or line.startswith("---"):
continue
if line.startswith("```"):
continue
if line.startswith("- "):
items.append(line)
elif not line.startswith("#"):
items.append(line)
return items[:10]
def _scope_chunks(self, chunks: list[ContentChunk]) -> list[ContentChunk]:
return sorted(
[
chunk
for chunk in chunks
if chunk.kind == "scope"
or chunk.metadata.get("source_role") == "derived_scope"
or chunk.path.lower().endswith("scope.md")
],
key=lambda chunk: (chunk.path, chunk.start_line),
)
def _ambitious_intent_sentence(self, current_scope: str) -> str:
cleaned = current_scope.strip().rstrip(".")
if not cleaned:
return "Provide a source-linked, reviewable repository capability."
return f"Provide a deliberate, reviewable implementation of: {cleaned}."
def ability_map(self, repository_id: int) -> RepositoryAbilityMap:
return self.store.get_ability_map(repository_id)
@@ -2965,6 +3323,74 @@ class RegistryService:
"sourceReferences": [
asdict(source_ref) for source_ref in evidence.source_refs
],
}
return index
def _candidate_dependency_characteristic_index(
self,
candidate_graph: CandidateGraph,
ability_map: RepositoryAbilityMap,
) -> dict[str, dict[str, object]]:
index: dict[str, dict[str, object]] = {
self._candidate_dependency_key("scope", ability_map.scope.id): {
"name": ability_map.scope.name,
"description": ability_map.scope.description,
"primaryClass": "draft-scope",
"attributes": ["draft", "scope", "candidate-derived"],
"confidence": ability_map.scope.confidence,
"reviewState": "draft",
"ownership": "curator_owned",
"sourceReferences": [],
}
}
for ability in candidate_graph.abilities:
index[self._candidate_dependency_key("ability", ability.id)] = {
"name": ability.name,
"description": ability.description,
"primaryClass": ability.primary_class,
"attributes": ability.attributes,
"confidence": ability.confidence,
"reviewState": ability.status,
"ownership": "mixed",
"sourceReferences": [asdict(ref) for ref in ability.source_refs],
}
for capability in ability.capabilities:
index[self._candidate_dependency_key("capability", capability.id)] = {
"name": capability.name,
"description": capability.description,
"primaryClass": capability.primary_class,
"attributes": capability.attributes,
"confidence": capability.confidence,
"reviewState": capability.status,
"ownership": "mixed",
"sourceReferences": [asdict(ref) for ref in capability.source_refs],
}
for feature in capability.features:
index[self._candidate_dependency_key("feature", feature.id)] = {
"name": feature.name,
"description": feature.location,
"primaryClass": feature.primary_class or feature.type,
"attributes": feature.attributes,
"confidence": feature.confidence,
"path": feature.location,
"reviewState": feature.status,
"ownership": "mixed",
"sourceReferences": [
asdict(source_ref) for source_ref in feature.source_refs
],
}
for evidence in capability.evidence:
index[self._candidate_dependency_key("evidence", evidence.id)] = {
"name": evidence.reference,
"description": evidence.type,
"primaryClass": evidence.type,
"attributes": [evidence.type, evidence.strength],
"confidence": self._evidence_confidence(evidence.strength),
"reviewState": evidence.status,
"ownership": "mixed",
"sourceReferences": [
asdict(source_ref) for source_ref in evidence.source_refs
],
}
return index
@@ -3223,6 +3649,134 @@ class RegistryService:
)
return edges
def _candidate_dependency_edges(
self,
candidate_graph: CandidateGraph,
ability_map: RepositoryAbilityMap,
) -> list[DependencyEdge]:
edges: list[DependencyEdge] = []
scope_key = self._candidate_dependency_key("scope", ability_map.scope.id)
for ability in candidate_graph.abilities:
ability_key = self._candidate_dependency_key("ability", ability.id)
edges.append(
self._dependency_edge(
source_kind="ability",
source_id=ability.id,
source_key=ability_key,
target_kind="scope",
target_id=ability_map.scope.id,
target_key=scope_key,
dependency_type="draft-summarizes",
strength="medium",
source="candidate_graph",
)
)
for source_ref in ability.source_refs:
edges.append(
self._dependency_edge(
source_kind="fact",
source_id=source_ref.fact_id,
source_key=self._source_ref_fact_key(source_ref),
target_kind="ability",
target_id=ability.id,
target_key=ability_key,
dependency_type="observes-draft",
strength="medium",
source="candidate_source_ref",
)
)
for capability in ability.capabilities:
capability_key = self._candidate_dependency_key(
"capability",
capability.id,
)
edges.append(
self._dependency_edge(
source_kind="capability",
source_id=capability.id,
source_key=capability_key,
target_kind="ability",
target_id=ability.id,
target_key=ability_key,
dependency_type="draft-realizes",
strength="medium",
source="candidate_graph",
)
)
for source_ref in capability.source_refs:
edges.append(
self._dependency_edge(
source_kind="fact",
source_id=source_ref.fact_id,
source_key=self._source_ref_fact_key(source_ref),
target_kind="capability",
target_id=capability.id,
target_key=capability_key,
dependency_type="observes-draft",
strength="medium",
source="candidate_source_ref",
)
)
for feature in capability.features:
feature_key = self._candidate_dependency_key("feature", feature.id)
edges.append(
self._dependency_edge(
source_kind="feature",
source_id=feature.id,
source_key=feature_key,
target_kind="capability",
target_id=capability.id,
target_key=capability_key,
dependency_type="draft-supports",
strength="medium",
source="candidate_graph",
)
)
for source_ref in feature.source_refs:
edges.append(
self._dependency_edge(
source_kind="fact",
source_id=source_ref.fact_id,
source_key=self._source_ref_fact_key(source_ref),
target_kind="feature",
target_id=feature.id,
target_key=feature_key,
dependency_type="observes-draft",
strength="medium",
source="candidate_source_ref",
)
)
for evidence in capability.evidence:
evidence_key = self._candidate_dependency_key("evidence", evidence.id)
edges.append(
self._dependency_edge(
source_kind="evidence",
source_id=evidence.id,
source_key=evidence_key,
target_kind="capability",
target_id=capability.id,
target_key=capability_key,
dependency_type="draft-supports",
strength=evidence.strength or "medium",
source="candidate_graph",
)
)
for source_ref in evidence.source_refs:
edges.append(
self._dependency_edge(
source_kind="fact",
source_id=source_ref.fact_id,
source_key=self._source_ref_fact_key(source_ref),
target_kind="evidence",
target_id=evidence.id,
target_key=evidence_key,
dependency_type="observes-draft",
strength=evidence.strength or "medium",
source="candidate_source_ref",
)
)
return edges
def _dependency_edge(
self,
*,
@@ -3253,6 +3807,9 @@ class RegistryService:
def _dependency_key(self, kind: str, item_id: int) -> str:
return f"{kind}:{item_id}"
def _candidate_dependency_key(self, kind: str, item_id: int) -> str:
return f"candidate:{kind}:{item_id}" if kind != "scope" else f"draft:scope:{item_id}"
def _source_ref_fact_key(self, source_ref) -> str:
return f"fact:{source_ref.kind}:{source_ref.path}:{source_ref.name}"

View File

@@ -1212,6 +1212,38 @@ def get_ability_map(
raise HTTPException(status_code=404, detail=str(exc)) from exc
@app.get(
"/repos/{repository_id}/intent/review",
tags=["scope"],
)
def review_repository_intent(
repository_id: int,
service: RegistryService = Depends(get_service),
) -> dict[str, object]:
try:
return service.document_review(repository_id, "INTENT.md")
except NotFoundError as exc:
raise HTTPException(status_code=404, detail=str(exc)) from exc
except ValueError as exc:
raise HTTPException(status_code=400, detail=str(exc)) from exc
@app.get(
"/repos/{repository_id}/scope/review",
tags=["scope"],
)
def review_repository_scope(
repository_id: int,
service: RegistryService = Depends(get_service),
) -> dict[str, object]:
try:
return service.document_review(repository_id, "SCOPE.md")
except NotFoundError as exc:
raise HTTPException(status_code=404, detail=str(exc)) from exc
except ValueError as exc:
raise HTTPException(status_code=400, detail=str(exc)) from exc
@app.get(
"/repos/{repository_id}/dependency-graph",
tags=["visualization"],

View File

@@ -1104,6 +1104,61 @@ def repository_scope_document(
)
@router.get("/ui/repos/{repository_id}/intent-review")
def repository_intent_review(
repository_id: int,
service: RegistryService = Depends(get_service),
) -> HTMLResponse:
return repository_document_review_page(repository_id, "INTENT.md", service)
@router.get("/ui/repos/{repository_id}/scope-review")
def repository_scope_review(
repository_id: int,
service: RegistryService = Depends(get_service),
) -> HTMLResponse:
return repository_document_review_page(repository_id, "SCOPE.md", service)
def repository_document_review_page(
repository_id: int,
document_name: str,
service: RegistryService,
) -> HTMLResponse:
payload = service.document_review(repository_id, document_name)
repository = service.get_repository(repository_id)
display_name = repository_display_name(repository)
current = str(payload.get("current_content") or "")
draft = str(payload.get("draft_content") or "")
provenance = payload.get("provenance") or {}
body = f"""
<div class="actions">
<h1 style="margin-right:auto">{escape(document_name)} Review</h1>
<a class="button secondary" href="/ui/repos/{repository_id}">Repository</a>
</div>
<section class="panel stack">
<p class="muted">{escape(str(payload.get("write_policy", "")))}</p>
<p><span class="pill">{'exists' if payload.get("exists") else 'missing'}</span>
<span class="source">{escape(str(payload.get("path", "")))}</span></p>
<label>Current {escape(document_name)}
<textarea rows="14" spellcheck="false">{escape(current)}</textarea>
</label>
<label>Draft {escape(document_name)}
<textarea rows="18" spellcheck="false">{escape(draft)}</textarea>
</label>
<p class="muted">analysis run {escape(str(provenance.get("analysis_run_id", "")))} ·
{escape(str(provenance.get("fact_count", 0)))} facts ·
{escape(str((provenance.get("candidate_counts") or {}).get("capabilities", 0)))} candidate capabilities</p>
</section>
"""
return page(
f"{document_name} Review",
body,
selected_repository=display_name,
selected_repository_id=repository.id,
)
@router.get("/ui/discovery")
def discovery_page(service: RegistryService = Depends(get_service)) -> HTMLResponse:
repositories = service.list_repositories()
@@ -1514,6 +1569,8 @@ def repository_detail(
<a class="button secondary" href="/ui/repos/{repository_id}/dependency-graph">Dependency Graph</a>
<a class="button secondary" href="/ui/repos/{repository_id}/export">Export</a>
<a class="button secondary" href="/ui/repos/{repository_id}/scope">SCOPE</a>
<a class="button secondary" href="/ui/repos/{repository_id}/scope-review">Scope Draft</a>
<a class="button secondary" href="/ui/repos/{repository_id}/intent-review">Intent Draft</a>
<a class="button secondary" href="/ui">Back</a>
</div>
<p class="muted">{escape(repository.description or '')}</p>

View File

@@ -182,6 +182,115 @@ def test_candidate_generator_prefers_intent_over_derived_scope_for_ability_name(
assert graph[0].name == "Provide A Provider-agnostic LLM Connector"
def test_candidate_generator_uses_scope_one_liner_over_template_readme():
repository = Repository(
id=1,
name="ops-warden",
url="/tmp/ops-warden",
description=None,
branch="main",
status="analyzed",
)
facts = [
fact(1, "documentation", "README", "README.md"),
fact(2, "scope", "SCOPE", "SCOPE.md", metadata={"source_role": "derived_scope"}),
]
chunks = [
chunk(
1,
"documentation",
"README.md",
"# repo-seed\nA git repository template to bootstrap coulomb projects from.",
end_line=2,
),
chunk(
2,
"scope",
"SCOPE.md",
"# SCOPE\n\n## One-liner\n"
"SSH Certificate Authority and credential issuance for the ops fleet.\n",
end_line=4,
),
]
chunks[1].metadata["source_role"] = "derived_scope"
graph = CandidateGraphGenerator().generate(repository, facts, chunks)
assert graph[0].name == "SSH Certificate Authority And Credential Issuance For The Ops Fleet"
assert "repo-seed" not in graph[0].description
def test_candidate_generator_extracts_current_capabilities_from_scope_blocks():
repository = Repository(
id=1,
name="railiance-apps",
url="/tmp/railiance-apps",
description=None,
branch="main",
status="analyzed",
)
facts = [
fact(1, "scope", "SCOPE", "SCOPE.md", metadata={"source_role": "derived_scope"}),
]
chunks = [
chunk(
1,
"scope",
"SCOPE.md",
"# SCOPE\n\n## One-liner\n"
"S5 Workloads and Experience layer of the Railiance OAS Stack.\n\n"
"## Provided Capabilities\n\n"
"```capability\n"
"type: infrastructure\n"
"title: Application workload deployment\n"
"description: Deploy and manage user-facing applications as Helm releases.\n"
"keywords: [gitea, helm, application]\n"
"```\n",
end_line=12,
),
]
chunks[0].metadata["source_role"] = "derived_scope"
graph = CandidateGraphGenerator().generate(repository, facts, chunks)
ability = graph[0]
assert ability.name == "S5 Workloads And Experience Layer Of The Railiance OAS Stack"
capability = ability.capabilities[0]
assert capability.name == "Application workload deployment"
assert capability.primary_class == "infrastructure"
assert {"scope-derived", "current-state", "review-required-scope"} <= set(
capability.attributes
)
assert capability.features[0].name == "Application workload deployment"
assert capability.features[0].location == "SCOPE.md"
assert capability.evidence[0].reference == "SCOPE.md"
def test_candidate_generator_adds_fact_derived_capability_when_no_stronger_layers():
repository = Repository(
id=1,
name="railiance-empty-layer",
url="/tmp/railiance-empty-layer",
description=None,
branch="main",
status="analyzed",
)
facts = [
fact(1, "config", "sops config", ".sops.yaml"),
fact(2, "manifest", "pyproject.toml", "pyproject.toml"),
]
graph = CandidateGraphGenerator().generate(repository, facts)
capability = graph[0].capabilities[0]
assert capability.name == "Manage Repository Configuration"
assert capability.primary_class == "fact-derived"
assert {feature.type for feature in capability.features} == {
"configuration",
"manifest",
}
def test_candidate_generator_enriches_descriptions_from_content_chunks():
repository = Repository(
id=1,

View File

@@ -216,6 +216,43 @@ def test_list_legacy_auto_approvals_cli_writes_json_inventory(tmp_path):
assert records[0]["current_approved_ability_count"] == 1
def test_assess_dataset_cli_reports_sparse_hierarchy_issues(tmp_path):
service = make_service(tmp_path)
source = tmp_path / "scope-only"
source.mkdir()
(source / "SCOPE.md").write_text(
"# SCOPE\n\n## One-liner\nScope-only current behavior.\n",
encoding="utf-8",
)
repository = service.register_repository(name="Scope Only", url=str(source))
service.analyze_repository(repository.id, use_llm_assistance=False)
output_path = tmp_path / "dataset.json"
exit_code = main(
[
"assess-dataset",
"--format",
"json",
"--output",
str(output_path),
"--database-path",
str(tmp_path / "registry.sqlite3"),
"--checkout-root",
str(tmp_path / "checkouts"),
]
)
report = json.loads(output_path.read_text(encoding="utf-8"))
repo_report = report["repositories"][0]
assert exit_code == 0
assert report["schema_version"] == "repo-scoping-dataset-assessment/v1"
assert repo_report["name"] == "Scope Only"
assert repo_report["documents"]["SCOPE.md"] is True
assert repo_report["candidate_counts"]["capabilities"] >= 1
assert repo_report["dependency_graph"]["node_count"] > 0
assert "facts-with-empty-dependency-graph" not in repo_report["issues"]
def test_self_assess_cli_exports_challenger_and_comparison(tmp_path):
source = write_repo(tmp_path)
golden_path = tmp_path / "golden.json"

View File

@@ -18,6 +18,8 @@ def test_quality_criteria_registry_is_versioned_and_reviewable():
"RREG-QC-004",
"RREG-QC-005",
"RREG-QC-006",
"RREG-QC-007",
"RREG-QC-008",
}
for criterion in registry.criteria:
assert criterion.description

View File

@@ -84,7 +84,67 @@ def test_quality_gates_flag_circular_scope_evidence():
outcomes = evaluate_candidate_capability_quality(capability)
assert outcomes[0].criterion_id == "RREG-QC-005"
assert outcomes[0].outcome == "rejected"
assert outcomes[0].outcome == "requires_review"
def test_quality_gates_flag_scope_derived_candidates_for_review():
capability = CandidateCapability(
id=12,
name="Application workload deployment",
description="Extracted from SCOPE.md.",
inputs=[],
outputs=[],
confidence=0.6,
status="candidate",
source_refs=[source_ref("SCOPE.md", "scope")],
confidence_label="medium",
primary_class="infrastructure",
attributes=["scope-derived", "review-required-scope"],
)
outcomes = evaluate_candidate_capability_quality(capability)
outcome_ids = {outcome.criterion_id for outcome in outcomes}
assert {"RREG-QC-005"} <= outcome_ids
assert all(outcome.outcome == "requires_review" for outcome in outcomes)
def test_quality_gates_flag_template_contaminated_abilities():
graph = CandidateGraph(
repository=Repository(
id=1,
name="Ops Warden",
url=".",
description=None,
branch="main",
status="analyzed",
),
analysis_run=AnalysisRun(
id=1,
repository_id=1,
snapshot_id=None,
status="completed",
started_at="2026-05-15T00:00:00Z",
completed_at="2026-05-15T00:00:01Z",
error_message=None,
scanner_version="deterministic-v1",
),
abilities=[
CandidateAbility(
id=1,
name="A Git Repository Template To Bootstrap Coulomb Projects",
description="Derived from repo-seed README boilerplate.",
confidence=0.7,
status="candidate",
source_refs=[source_ref("README.md", "documentation")],
)
],
)
outcomes = evaluate_candidate_graph_quality(graph)
assert outcomes[0].criterion_id == "RREG-QC-007"
assert outcomes[0].outcome == "downgraded"
def test_quality_gate_outcomes_are_serializable_for_assessment_artifacts():

View File

@@ -498,6 +498,49 @@ def test_dependency_graph_deduplicates_document_fact_nodes(tmp_path):
assert fact_nodes[0]["label"] == "README.md (documentation)"
def test_dependency_graph_renders_candidate_fallback_when_approved_hierarchy_missing(tmp_path):
service = make_service(tmp_path)
source = tmp_path / "scope-candidate"
source.mkdir()
(source / "SCOPE.md").write_text(
"# SCOPE\n\n"
"## One-liner\n"
"S5 Workloads and Experience layer.\n\n"
"## Provided Capabilities\n\n"
"```capability\n"
"type: infrastructure\n"
"title: Application workload deployment\n"
"description: Deploy applications as Helm releases.\n"
"keywords: [helm]\n"
"```\n",
encoding="utf-8",
)
repository = service.register_repository(name="Scope Candidate", url=str(source))
service.analyze_repository(
repository.id,
source_path=str(source),
use_llm_assistance=False,
)
payload = service.dependency_graph_elements(repository.id, use_latest_profile=False)
nodes = [
element["data"]
for element in payload["elements"]
if "source" not in element["data"]
]
edges = [
element["data"]
for element in payload["elements"]
if "source" in element["data"]
]
assert payload["metrics"]["node_count"] > 0
assert any(node["reviewState"] == "candidate" for node in nodes)
assert any(node["reviewState"] == "draft" for node in nodes)
assert any(edge["dependencyType"] == "draft-realizes" for edge in edges)
assert any(edge["dependencyType"] == "draft-supports" for edge in edges)
def test_manual_registry_updates_and_deletes_approved_entries(tmp_path):
service = make_service(tmp_path)
repository = service.register_repository(

View File

@@ -466,6 +466,12 @@ def test_openapi_contract_snapshot_for_stable_agent_paths():
"/repos/{repository_id}/export": {
"get": {"tags": ["discovery"], "success_schema": "application/x-yaml"}
},
"/repos/{repository_id}/intent/review": {
"get": {"tags": ["scope"], "success_schema": "object"}
},
"/repos/{repository_id}/scope/review": {
"get": {"tags": ["scope"], "success_schema": "object"}
},
"/repos/{repo_slug}/scope": {
"get": {"tags": ["scope"], "success_schema": None}
},
@@ -837,6 +843,62 @@ def test_api_generates_diffs_and_writes_scope_md(tmp_path):
app.dependency_overrides.clear()
def test_api_reviews_intent_and_scope_drafts_without_writing_intent(tmp_path):
source = tmp_path / "draft-repo"
source.mkdir()
(source / "SCOPE.md").write_text(
"# SCOPE\n\n"
"## One-liner\n"
"S5 Workloads and Experience layer.\n\n"
"## Provided Capabilities\n\n"
"```capability\n"
"type: infrastructure\n"
"title: Application workload deployment\n"
"description: Deploy applications as Helm releases.\n"
"keywords: [helm]\n"
"```\n",
encoding="utf-8",
)
def override_settings():
return Settings(
database_path=str(tmp_path / "draft-api.sqlite3"),
checkout_root=str(tmp_path / "checkouts"),
)
app.dependency_overrides[get_settings] = override_settings
client = TestClient(app)
try:
repository = client.post(
"/repos",
json={"name": "Draft Repo", "url": str(source)},
).json()
analysis = client.post(
f"/repos/{repository['id']}/analysis-runs",
json={"source_path": str(source), "use_llm_assistance": False},
).json()
assert analysis["analysis_run"]["status"] == "completed"
intent_review = client.get(f"/repos/{repository['id']}/intent/review")
assert intent_review.status_code == 200
intent_payload = intent_review.json()
assert intent_payload["document"] == "INTENT.md"
assert intent_payload["exists"] is False
assert "Application workload deployment" in intent_payload["draft_content"]
assert "does not write INTENT.md automatically" in intent_payload["write_policy"]
assert not (source / "INTENT.md").exists()
scope_review = client.get(f"/repos/{repository['id']}/scope/review")
assert scope_review.status_code == 200
scope_payload = scope_review.json()
assert scope_payload["exists"] is True
assert "S5 Workloads and Experience layer" in scope_payload["current_content"]
assert "Application workload deployment" in scope_payload["draft_content"]
assert scope_payload["provenance"]["candidate_counts"]["capabilities"] >= 1
finally:
app.dependency_overrides.clear()
def test_api_compare_gap_and_export_use_cases(tmp_path):
def override_settings():
return Settings(
@@ -1550,6 +1612,14 @@ def test_ui_register_analyze_and_approve_loop(tmp_path):
f'<a class="button secondary" href="/ui/repos/{repository_id}/dependency-graph">Dependency Graph</a>'
in detail_response.text
)
assert (
f'<a class="button secondary" href="/ui/repos/{repository_id}/scope-review">Scope Draft</a>'
in detail_response.text
)
assert (
f'<a class="button secondary" href="/ui/repos/{repository_id}/intent-review">Intent Draft</a>'
in detail_response.text
)
repo_scope_response = client.get(f"/ui/repos/{repository_id}/scope")
assert repo_scope_response.status_code == 200
@@ -1600,7 +1670,9 @@ def test_ui_register_analyze_and_approve_loop(tmp_path):
assert "Content Chunks" in run_detail.text
assert "README.md:1-2" in run_detail.text
assert "ID " in run_detail.text
assert "No review decisions yet." in run_detail.text
assert "quality_gate_evaluation" in run_detail.text
assert "requires_review:" in run_detail.text
assert "without approving registry truth" in run_detail.text
assert "Expectation Gaps" in run_detail.text
assert "Record Gap" in run_detail.text
@@ -1674,7 +1746,7 @@ def test_ui_register_analyze_and_approve_loop(tmp_path):
assert "Discovery" in approved_detail.text
assert "Export" in approved_detail.text
assert "Elements" in approved_detail.text
assert "q=Report+Service+Status" in approved_detail.text
assert "q=UI+Repo+Owns+The+Status+Reporting+Scope" in approved_detail.text
graph_response = client.get(f"/repos/{repository_id}/dependency-graph")
assert graph_response.status_code == 200
@@ -1787,7 +1859,7 @@ def test_ui_register_analyze_and_approve_loop(tmp_path):
f"/ui/repos/{repository_id}/elements?scope=facts&amp;analysis_run_id={first_run_id}&amp;type=facts"
in approved_detail.text
)
assert "Report Service Status Through API And CLI Entry" in approved_detail.text
assert "UI Repo Owns The Status Reporting Scope" in approved_detail.text
assert "Language: Python" in approved_detail.text
assert "Framework: FastAPI" in approved_detail.text
assert "interface:app.py:3" in approved_detail.text
@@ -1801,7 +1873,7 @@ def test_ui_register_analyze_and_approve_loop(tmp_path):
assert "Registry Capabilities" in approved_listing.text
assert "Entry" in approved_listing.text
assert "Approved only" in approved_listing.text
assert "Expose Repository Interface" in approved_listing.text
assert "UI Repo Owns The Status Reporting Scope" in approved_listing.text
assert "Save" in approved_listing.text
assert "Delete" in approved_listing.text
@@ -1964,14 +2036,14 @@ def test_ui_register_analyze_and_approve_loop(tmp_path):
filtered_search_response = client.get(
"/ui/search",
params={
"q": "repository",
"status": "indexed",
"language": "Python",
"ability": "Report Service Status",
"capability": "Repository",
},
)
params={
"q": "repository",
"status": "indexed",
"language": "Python",
"ability": "UI Repo",
"capability": "Scope",
},
)
assert filtered_search_response.status_code == 200
assert "UI Repo" in filtered_search_response.text

View File

@@ -31,7 +31,7 @@ abilities, and draft scope from facts, source-linked text, and existing
## Dataset Assessment
The current `var/repo-scoping.sqlite3` dataset contains eight repositories. The
The initial `var/repo-scoping.sqlite3` dataset contained eight repositories. The
new non-repo-scoping repositories all completed analysis, but only
`ops-warden` produced a candidate capability and feature. Railiance repos mostly
produced one candidate ability, zero candidate capabilities, zero candidate
@@ -59,7 +59,7 @@ Observed patterns:
```task
id: RREG-WP-0018-T01
status: todo
status: done
priority: high
state_hub_task_id: "dd00a642-7c69-4ae2-b7ac-954c31a1c72a"
```
@@ -80,7 +80,7 @@ Acceptance criteria:
```task
id: RREG-WP-0018-T02
status: todo
status: done
priority: high
state_hub_task_id: "01eb03da-7a0e-4e22-ae2d-7596752d178e"
```
@@ -106,7 +106,7 @@ Acceptance criteria:
```task
id: RREG-WP-0018-T03
status: todo
status: done
priority: high
state_hub_task_id: "fd572f4d-d2f6-4c85-bbf5-f77829fd6e6a"
```
@@ -129,7 +129,7 @@ Acceptance criteria:
```task
id: RREG-WP-0018-T04
status: todo
status: done
priority: high
state_hub_task_id: "286d96e0-ec5a-4a55-bb50-62d20ab25830"
```
@@ -152,7 +152,7 @@ Acceptance criteria:
```task
id: RREG-WP-0018-T05
status: todo
status: done
priority: high
state_hub_task_id: "80bc671c-2361-47e5-8135-7c945de66437"
```
@@ -175,7 +175,7 @@ Acceptance criteria:
```task
id: RREG-WP-0018-T06
status: todo
status: done
priority: medium
state_hub_task_id: "4b74a058-b759-42d2-a243-7134dd907093"
```
@@ -197,7 +197,7 @@ Acceptance criteria:
```task
id: RREG-WP-0018-T07
status: todo
status: in_progress
priority: medium
state_hub_task_id: "cd1a3c14-076b-42da-8319-48310a964611"
```
@@ -213,3 +213,30 @@ Acceptance criteria:
- Dependency graph element counts are non-zero for repositories with facts.
- The comparison report makes it easy to judge whether the new result is better
than the previous sparse output.
## Implementation Update
Implemented the comparison and generation infrastructure needed to rerun the
dataset:
- Added `repo-scoping assess-dataset` to summarize latest runs by facts,
chunks, candidate/approved hierarchy counts, graph coverage, document
presence, and sparse-hierarchy quality issues.
- Updated candidate generation so `SCOPE.md` one-liners and `Provided
Capabilities` blocks seed reviewable current-state abilities/capabilities,
while deterministic fact fallback now requires stronger configuration facts
and does not promote dependency-only repositories.
- Added review-only `INTENT.md`/`SCOPE.md` API and UI draft views. Missing
`INTENT.md` now produces an ambitious draft derived from scope/candidates
without writing the file.
- Added dependency graph fallback nodes/edges for candidate and draft
hierarchies so repos with facts no longer render empty just because approved
characteristics are absent.
- Added transparent quality criteria for template contamination and
scope-vs-intent separation; deterministic gates can require review but do not
accept registry truth.
The latest local assessment command currently sees nine repositories because
`vantage-point` has been added. It still reports old sparse Railiance candidate
counts because those stored analysis runs predate this implementation. T07 stays
open until the affected repositories are rerun and compared against the sparse
baseline.