diff --git a/docs/quality-criteria/acceptance-quality-criteria.v1.json b/docs/quality-criteria/acceptance-quality-criteria.v1.json index 8fa3383..fb6f98b 100644 --- a/docs/quality-criteria/acceptance-quality-criteria.v1.json +++ b/docs/quality-criteria/acceptance-quality-criteria.v1.json @@ -73,8 +73,8 @@ "severity": "critical", "applies_to": ["ability", "capability", "feature", "evidence"], "description": "Generated SCOPE.md text cannot be primary evidence for rebuilding the same characteristic model. It may be comparison context, bootstrap context, or a generated output under review.", - "deterministic_action": "rejected", - "deterministic_action_when": "A candidate is supported only or primarily by generated SCOPE.md content from the same scoping process.", + "deterministic_action": "requires_review", + "deterministic_action_when": "A candidate is supported only or primarily by generated or derived SCOPE.md content from the same scoping process.", "reviewer_guidance": "Use source, docs, tests, and product intent instead of accepting circular evidence.", "agentic_guidance": "Treat circular generated-scope evidence as a blocker unless independent evidence supports the same claim.", "examples": [ @@ -95,6 +95,36 @@ "examples": [ "Schema examples mentioning model providers should not create native model-provider capabilities." ] + }, + { + "id": "RREG-QC-007", + "title": "Template Boilerplate Is Not Repository Purpose", + "category": "template-contamination", + "severity": "high", + "applies_to": ["ability", "capability"], + "description": "Repository templates, seed README text, and bootstrap boilerplate should not become the repository's native ability or capability when more specific source evidence exists.", + "deterministic_action": "downgraded", + "deterministic_action_when": "Candidate names or descriptions are dominated by template boilerplate such as repo-seed instead of repo-specific evidence.", + "reviewer_guidance": "Prefer SCOPE, INTENT, implementation, or product docs that describe this repository, not the template it was created from.", + "agentic_guidance": "Detect template text and replace it with a repo-specific abstraction before proposing approval.", + "examples": [ + "A README that says 'A git repository template to bootstrap coulomb projects' should not become the ability for ops-warden." + ] + }, + { + "id": "RREG-QC-008", + "title": "Scope-Derived Drafts Stay Separate From Intent", + "category": "scope-intent-separation", + "severity": "medium", + "applies_to": ["scope", "intent", "ability", "capability"], + "description": "Existing SCOPE.md content can bootstrap current-state candidates and draft intent, but it must remain clearly labeled as scope-derived until reviewed.", + "deterministic_action": "requires_review", + "deterministic_action_when": "A candidate or draft is generated from SCOPE.md rather than authored INTENT.md or implementation evidence.", + "reviewer_guidance": "Check whether the claim describes current behavior, desired future utility, or both. Do not write INTENT.md without explicit review.", + "agentic_guidance": "Use SCOPE.md to propose current-state candidates and ambitious intent drafts, but keep provenance and review status explicit.", + "examples": [ + "A Railiance SCOPE.md capability block can create a candidate capability, not approved registry truth." + ] } ] } diff --git a/src/repo_scoping/acceptance/gates.py b/src/repo_scoping/acceptance/gates.py index 33d885e..3275135 100644 --- a/src/repo_scoping/acceptance/gates.py +++ b/src/repo_scoping/acceptance/gates.py @@ -8,6 +8,7 @@ from repo_scoping.acceptance.criteria import ( load_quality_criteria, ) from repo_scoping.core.models import ( + CandidateAbility, CandidateCapability, CandidateFeature, CandidateGraph, @@ -39,11 +40,33 @@ def evaluate_candidate_graph_quality( active_registry = registry or load_quality_criteria() outcomes: list[QualityGateOutcome] = [] for ability in graph.abilities: + outcomes.extend(evaluate_candidate_ability_quality(ability, active_registry)) for capability in ability.capabilities: outcomes.extend(evaluate_candidate_capability_quality(capability, active_registry)) return outcomes +def evaluate_candidate_ability_quality( + ability: CandidateAbility, + registry: QualityCriteriaRegistry | None = None, +) -> list[QualityGateOutcome]: + active_registry = registry or load_quality_criteria() + criteria = {criterion.id: criterion for criterion in active_registry.criteria} + outcomes: list[QualityGateOutcome] = [] + if _looks_template_contaminated(ability.name, ability.description): + outcomes.append( + _outcome( + active_registry, + criteria["RREG-QC-007"], + element_type="ability", + element_id=ability.id, + element_name=ability.name, + reason="Candidate ability appears to be based on template boilerplate.", + ) + ) + return outcomes + + def evaluate_candidate_capability_quality( capability: CandidateCapability, registry: QualityCriteriaRegistry | None = None, @@ -75,6 +98,17 @@ def evaluate_candidate_capability_quality( reason="Candidate is supported only by generated SCOPE.md evidence.", ) ) + elif _has_scope_refs_or_attributes(refs, capability.attributes): + outcomes.append( + _outcome( + active_registry, + criteria["RREG-QC-008"], + element_type="capability", + element_id=capability.id, + element_name=capability.name, + reason="Candidate is scope-derived and must remain review-only until separated from intent.", + ) + ) elif _all_weak_source_refs(refs): outcomes.append( _outcome( @@ -97,6 +131,18 @@ def evaluate_candidate_capability_quality( ) ) + if _looks_template_contaminated(capability.name, capability.description): + outcomes.append( + _outcome( + active_registry, + criteria["RREG-QC-007"], + element_type="capability", + element_id=capability.id, + element_name=capability.name, + reason="Candidate capability appears to be based on template boilerplate.", + ) + ) + if _looks_like_provider_routing(capability): outcomes.append( _outcome( @@ -197,6 +243,25 @@ def _all_generated_scope_refs(refs: list[SourceReference]) -> bool: return bool(refs) and all(ref.path.endswith("SCOPE.md") for ref in refs) +def _has_scope_refs_or_attributes( + refs: list[SourceReference], + attributes: list[str], +) -> bool: + return any(ref.path.endswith("SCOPE.md") for ref in refs) or any( + attribute in {"scope-derived", "review-required-scope"} + for attribute in attributes + ) + + +def _looks_template_contaminated(name: str, description: str) -> bool: + text = f"{name} {description}".lower() + return ( + "repo-seed" in text + or "git repository template to bootstrap" in text + or "bootstrap coulomb projects" in text + ) + + def _all_weak_source_refs(refs: list[SourceReference]) -> bool: return bool(refs) and all(_is_weak_source_ref(ref) for ref in refs) diff --git a/src/repo_scoping/candidate_graph/generator.py b/src/repo_scoping/candidate_graph/generator.py index a077ed6..686d86a 100644 --- a/src/repo_scoping/candidate_graph/generator.py +++ b/src/repo_scoping/candidate_graph/generator.py @@ -275,6 +275,8 @@ class CandidateGraphGenerator: manifests = self._facts(facts, "manifest") frameworks = self._facts(facts, "framework") languages = self._facts(facts, "language") + configs = self._facts(facts, "config") + scope_facts = self._facts(facts, "scope") llm_providers = self._facts(facts, "llm_provider") credential_configs = self._facts(facts, "credential_config") provider_registries = self._facts(facts, "provider_registry") @@ -286,7 +288,7 @@ class CandidateGraphGenerator: chunks, ) - ability_sources = docs or manifests or languages + ability_sources = docs or scope_facts or manifests or languages or configs ability = CandidateAbilityDraft( name=self._ability_name(repository, chunks), description=self._ability_description(chunks), @@ -308,6 +310,15 @@ class CandidateGraphGenerator: capabilities.extend( self._intent_capabilities(intent_facts, chunks, tests, examples, docs) ) + capabilities.extend( + self._scope_capabilities( + scope_facts, + chunks, + tests, + examples, + allow_summary_fallback=not intent_facts, + ) + ) capabilities.extend( self._repo_scoping_native_capabilities( repository, @@ -347,6 +358,18 @@ class CandidateGraphGenerator: capabilities.append( self._interface_capability(interfaces, tests, examples, docs, chunks) ) + if not capabilities: + capabilities.extend( + self._fact_derived_capabilities( + configs=configs, + manifests=manifests, + frameworks=frameworks, + languages=languages, + docs=docs, + tests=tests, + chunks=chunks, + ) + ) return [ CandidateAbilityDraft( @@ -582,6 +605,257 @@ class CandidateGraphGenerator: words.pop() return self._title_from_words(words[:10]) + def _scope_capabilities( + self, + scope_facts: list[ObservedFact], + chunks: list[ContentChunk], + tests: list[ObservedFact], + examples: list[ObservedFact], + *, + allow_summary_fallback: bool = True, + ) -> list[CandidateCapabilityDraft]: + scope_chunks = [ + chunk + for chunk in chunks + if chunk.kind == "scope" + or chunk.metadata.get("source_role") == "derived_scope" + or chunk.path.lower().endswith("scope.md") + ] + if not scope_chunks: + return [] + source_refs = self._source_refs(scope_facts) + capabilities: list[CandidateCapabilityDraft] = [] + seen: set[str] = set() + for block in self._scope_capability_blocks(scope_chunks): + title = block.get("title", "").strip() + if not title: + continue + key = title.lower() + if key in seen: + continue + seen.add(key) + capability_type = block.get("type", "scope-derived").strip() or "scope-derived" + description = block.get("description", "").strip() + keywords = self._scope_keywords(block.get("keywords", "")) + attributes = self._unique( + [ + capability_type, + *keywords, + "scope-derived", + "current-state", + "review-required-scope", + ] + ) + feature = CandidateFeatureDraft( + name=title, + type=capability_type, + location="SCOPE.md", + confidence=0.55, + source_refs=source_refs, + primary_class=capability_type, + attributes=self._unique( + [capability_type, "scope-defined", "review-required-scope"] + ), + ) + capabilities.append( + CandidateCapabilityDraft( + name=title, + description=( + "Reviewable current-state capability extracted from " + f"SCOPE.md: {description or title}" + ), + inputs=[], + outputs=[title], + confidence=self._confidence( + 0.45, + [ + (0.10, bool(description)), + (0.05, bool(keywords)), + (0.05, bool(tests)), + (0.05, bool(examples)), + ], + ), + source_refs=source_refs, + primary_class=capability_type, + attributes=attributes, + features=[feature], + evidence=[ + CandidateEvidenceDraft( + type="scope-current-state", + reference="SCOPE.md", + strength="medium", + source_refs=source_refs, + ) + ], + ) + ) + if capabilities or not allow_summary_fallback: + return capabilities + fallback_name = self._scope_summary_capability_name(scope_chunks) + if not fallback_name: + return [] + return [ + CandidateCapabilityDraft( + name=fallback_name, + description=( + "Reviewable current-state capability inferred from SCOPE.md " + "summary text. A curator should split this into more precise " + "capabilities when reviewing." + ), + inputs=[], + outputs=[fallback_name], + confidence=0.45, + source_refs=source_refs, + primary_class="scope-derived", + attributes=[ + "scope-derived", + "current-state", + "review-required-scope", + ], + evidence=[ + CandidateEvidenceDraft( + type="scope-current-state", + reference="SCOPE.md", + strength="weak", + source_refs=source_refs, + ) + ], + ) + ] + + def _scope_capability_blocks( + self, + chunks: list[ContentChunk], + ) -> list[dict[str, str]]: + blocks: list[dict[str, str]] = [] + in_block = False + current: dict[str, str] = {} + current_key = "" + for chunk in sorted(chunks, key=lambda item: (item.path, item.start_line)): + for raw_line in chunk.text.splitlines(): + line = raw_line.rstrip() + stripped = line.strip() + if stripped.startswith("```capability"): + in_block = True + current = {} + current_key = "" + continue + if in_block and stripped.startswith("```"): + if current: + blocks.append(current) + in_block = False + current = {} + current_key = "" + continue + if not in_block: + continue + key, separator, value = stripped.partition(":") + if separator and re.match(r"^[A-Za-z_][A-Za-z0-9_-]*$", key): + current_key = key.lower() + current[current_key] = value.strip().strip('"') + elif current_key and stripped: + current[current_key] = ( + f"{current[current_key]} {stripped.strip()}" + ).strip() + return blocks + + def _scope_keywords(self, value: str) -> list[str]: + cleaned = value.strip() + if cleaned.startswith("[") and cleaned.endswith("]"): + cleaned = cleaned[1:-1] + return [ + item.strip(" `\"'") + for item in cleaned.split(",") + if item.strip(" `\"'") + ][:8] + + def _scope_summary_capability_name(self, chunks: list[ContentChunk]) -> str: + one_liner = self._scope_one_liner(chunks) + if one_liner: + return self._imperative_purpose(one_liner) + return "" + + def _fact_derived_capabilities( + self, + *, + configs: list[ObservedFact], + manifests: list[ObservedFact], + frameworks: list[ObservedFact], + languages: list[ObservedFact], + docs: list[ObservedFact], + tests: list[ObservedFact], + chunks: list[ContentChunk], + ) -> list[CandidateCapabilityDraft]: + if not configs: + return [] + capability_facts = configs + manifests + frameworks + languages + if not capability_facts: + return [] + features: list[CandidateFeatureDraft] = [] + for label, kind, facts in ( + ("Manage Repository Configuration", "configuration", configs), + ("Declare Runtime And Package Manifests", "manifest", manifests), + ("Use Detected Frameworks", "framework", frameworks), + ("Provide Implementation In Detected Languages", "implementation", languages), + ): + if not facts: + continue + features.append( + CandidateFeatureDraft( + name=label, + type=kind, + location=self._grouped_location(facts), + confidence=0.45, + source_refs=self._source_refs(facts), + primary_class=kind, + attributes=[kind, "fact-derived", "review-required"], + ) + ) + if not features: + return [] + name = self._fact_derived_capability_name(chunks, features) + return [ + CandidateCapabilityDraft( + name=name, + description=( + "Reviewable capability inferred from deterministic facts. " + "This fills the hierarchy when no stronger intent, scope " + "capability, or interface candidate exists." + ), + inputs=self._feature_inputs(features), + outputs=self._feature_outputs(features), + confidence=self._confidence( + 0.35, + [ + (0.10, bool(configs)), + (0.10, bool(manifests)), + (0.05, bool(frameworks)), + (0.05, bool(tests)), + (0.05, bool(docs)), + ], + ), + source_refs=self._source_refs(capability_facts), + primary_class="fact-derived", + attributes=["fact-derived", "review-required", "partial-hierarchy"], + features=features, + evidence=self._evidence(tests, [], docs), + ) + ] + + def _fact_derived_capability_name( + self, + chunks: list[ContentChunk], + features: list[CandidateFeatureDraft], + ) -> str: + scope_name = self._scope_summary_capability_name(chunks) + if scope_name: + return scope_name + if any(feature.type == "configuration" for feature in features): + return "Manage Repository Configuration" + if any(feature.type == "manifest" for feature in features): + return "Declare Repository Runtime" + return "Describe Repository Implementation" + def _repo_scoping_native_capabilities( self, repository: Repository, @@ -1219,40 +1493,110 @@ class CandidateGraphGenerator: ops_name = self._operations_ability_name(chunks) if ops_name: return ops_name - purpose_text = self._document_purpose_sentence(chunks) or repository.description + purpose_text = ( + self._intent_purpose_sentence(chunks) + or self._scope_one_liner(chunks) + or self._documentation_purpose_sentence(chunks) + or repository.description + ) if purpose_text: normalized = self._imperative_purpose(purpose_text) if normalized: return normalized return f"Support {self._humanize_identifier(repository.name)}" - def _document_purpose_sentence(self, chunks: list[ContentChunk]) -> str: - for chunk in self._purpose_chunks(chunks): + def _intent_purpose_sentence(self, chunks: list[ContentChunk]) -> str: + return self._purpose_sentence_for_chunks( + [ + chunk + for chunk in self._purpose_chunks(chunks) + if chunk.kind == "intent" + or chunk.metadata.get("source_role") == "intent_summary" + or chunk.path.lower().endswith("intent.md") + ] + ) + + def _documentation_purpose_sentence(self, chunks: list[ContentChunk]) -> str: + return self._purpose_sentence_for_chunks( + [ + chunk + for chunk in self._purpose_chunks(chunks) + if chunk.kind == "documentation" + and chunk.metadata.get("source_role") != "derived_scope" + and not chunk.path.lower().endswith("scope.md") + ] + ) + + def _purpose_sentence_for_chunks(self, chunks: list[ContentChunk]) -> str: + for chunk in chunks: if chunk.kind not in {"intent", "documentation"}: continue lines = [line.strip() for line in chunk.text.splitlines() if line.strip()] paragraph = next((line for line in lines if not line.startswith("#")), "") - if paragraph: + if paragraph and not self._is_template_boilerplate(paragraph): return paragraph return "" + def _scope_one_liner(self, chunks: list[ContentChunk]) -> str: + for chunk in sorted(chunks, key=lambda item: (item.path, item.start_line)): + if not ( + chunk.kind == "scope" + or chunk.metadata.get("source_role") == "derived_scope" + or chunk.path.lower().endswith("scope.md") + ): + continue + lines = chunk.text.splitlines() + for index, raw_line in enumerate(lines): + if raw_line.strip().lower() == "## one-liner": + for following in lines[index + 1 :]: + candidate = following.strip() + if not candidate or candidate.startswith("---"): + continue + if candidate.startswith(">"): + continue + return candidate.strip(" .") + before_first_section: list[str] = [] + for raw_line in lines: + candidate = raw_line.strip() + if candidate.startswith("## "): + break + before_first_section.append(candidate) + for candidate in before_first_section: + if ( + candidate + and not candidate.startswith("#") + and not candidate.startswith(">") + and not candidate.startswith("---") + and not self._is_template_boilerplate(candidate) + ): + return candidate.strip(" .") + return "" + + def _is_template_boilerplate(self, text: str) -> bool: + lowered = text.lower() + return ( + "git repository template to bootstrap" in lowered + or "this file helps you quickly understand" in lowered + or "intentionally lightweight and may be incomplete" in lowered + ) + def _purpose_chunks(self, chunks: list[ContentChunk]) -> list[ContentChunk]: def priority(chunk: ContentChunk) -> tuple[int, str, int]: role = chunk.metadata.get("source_role") path = chunk.path.lower() if role == "intent_summary" or path.endswith("intent.md"): return (0, path, chunk.start_line) - if role == "product_documentation" or path.startswith("readme"): - return (1, path, chunk.start_line) if role == "derived_scope" or path.endswith("scope.md"): - return (3, path, chunk.start_line) - return (2, path, chunk.start_line) + return (1, path, chunk.start_line) + if role == "product_documentation" or path.startswith("readme"): + return (2, path, chunk.start_line) + return (3, path, chunk.start_line) return sorted( [ chunk for chunk in chunks - if chunk.kind in {"intent", "documentation"} + if chunk.kind in {"intent", "documentation", "scope"} and chunk.metadata.get("source_role") != "agent_guidance" ], key=priority, @@ -1284,9 +1628,11 @@ class CandidateGraphGenerator: if not words: return "" words[0] = self._imperative_verb(words[0]) - return self._title_from_words(words[:8]) + return self._title_from_words(words[:10]) def _imperative_verb(self, word: str) -> str: + if word.isupper(): + return word lower = word.lower().strip(",;:") irregular = { "does": "do", @@ -1313,7 +1659,7 @@ class CandidateGraphGenerator: for word in words ] return " ".join( - word[:1].upper() + word[1:] + word if word.isupper() else word[:1].upper() + word[1:] for word in cleaned_words if word ) @@ -1341,17 +1687,37 @@ class CandidateGraphGenerator: lines = [line.strip() for line in chunk.text.splitlines() if line.strip()] if not lines: continue + if chunk.kind == "scope" or chunk.metadata.get("source_role") == "derived_scope": + one_liner = self._scope_one_liner([chunk]) + if one_liner: + return f"SCOPE. {one_liner}" heading = next((line.lstrip("#").strip() for line in lines if line.startswith("#")), "") paragraph = next((line for line in lines if not line.startswith("#")), "") + if self._is_template_boilerplate(paragraph): + paragraph = "" if heading and paragraph: return f"{heading}. {paragraph}" return heading or paragraph return "" def _documentation_chunks(self, chunks: list[ContentChunk]) -> list[ContentChunk]: + def priority(chunk: ContentChunk) -> tuple[int, str, int]: + role = chunk.metadata.get("source_role") + path = chunk.path.lower() + if chunk.kind == "intent" or role == "intent_summary" or path.endswith("intent.md"): + return (0, path, chunk.start_line) + if chunk.kind == "scope" or role == "derived_scope" or path.endswith("scope.md"): + return (1, path, chunk.start_line) + return (2, path, chunk.start_line) + return sorted( - [chunk for chunk in chunks if chunk.kind in {"intent", "documentation"}], - key=lambda chunk: (0 if chunk.kind == "intent" else 1, chunk.path, chunk.start_line), + [ + chunk + for chunk in chunks + if chunk.kind in {"intent", "documentation", "scope"} + and chunk.metadata.get("source_role") != "agent_guidance" + ], + key=priority, ) def _interface_summary(self, chunks: list[ContentChunk]) -> str: diff --git a/src/repo_scoping/cli.py b/src/repo_scoping/cli.py index 28b58cb..1271584 100644 --- a/src/repo_scoping/cli.py +++ b/src/repo_scoping/cli.py @@ -186,6 +186,19 @@ def build_parser() -> argparse.ArgumentParser: default="markdown", help="Inventory output format.", ) + dataset = subparsers.add_parser( + "assess-dataset", + help="Summarize repository generation coverage across the local dataset.", + ) + dataset.add_argument("--database-path", help="Override REPO_SCOPING_DATABASE_PATH.") + dataset.add_argument("--checkout-root", help="Override REPO_SCOPING_CHECKOUT_ROOT.") + dataset.add_argument("--output", help="Write dataset assessment to this path instead of stdout.") + dataset.add_argument( + "--format", + choices=["json", "markdown"], + default="markdown", + help="Dataset assessment output format.", + ) return parser @@ -204,6 +217,8 @@ def main(argv: Sequence[str] | None = None) -> int: return list_quality_criteria_command(args) if args.command == "list-legacy-auto-approvals": return list_legacy_auto_approvals_command(args) + if args.command == "assess-dataset": + return assess_dataset_command(args) parser.error(f"unknown command: {args.command}") return 2 @@ -285,6 +300,235 @@ def list_legacy_auto_approvals_command(args: argparse.Namespace) -> int: return 0 +def assess_dataset_command(args: argparse.Namespace) -> int: + service = service_from_args(args) + report = dataset_assessment(service) + content = ( + json.dumps(report, indent=2) + "\n" + if args.format == "json" + else dataset_assessment_markdown(report) + ) + if args.output: + write_text(args.output, content) + else: + print(content, end="" if content.endswith("\n") else "\n") + return 0 + + +def dataset_assessment(service: RegistryService) -> dict[str, object]: + repositories = [] + totals = { + "repositories": 0, + "facts": 0, + "content_chunks": 0, + "candidate_abilities": 0, + "candidate_capabilities": 0, + "candidate_features": 0, + "candidate_evidence": 0, + "approved_abilities": 0, + "approved_capabilities": 0, + "approved_features": 0, + "approved_evidence": 0, + "dependency_graph_nodes": 0, + "dependency_graph_edges": 0, + } + for repository in service.list_repositories(): + runs = service.list_analysis_runs(repository.id) + latest_run = next((run for run in reversed(runs) if run.status == "completed"), None) + facts = service.list_observed_facts(repository.id, latest_run.id) if latest_run else [] + chunks = service.list_content_chunks(repository.id, latest_run.id) if latest_run else [] + candidate_counts = { + "abilities": 0, + "capabilities": 0, + "features": 0, + "evidence": 0, + } + candidate_names: list[str] = [] + if latest_run is not None: + try: + graph = service.candidate_graph(repository.id, latest_run.id) + except NotFoundError: + graph = None + if graph is not None: + candidate_counts = candidate_graph_counts(graph) + candidate_names = [ + ability.name + for ability in graph.abilities + ][:5] + ability_map = service.ability_map(repository.id) + approved_counts = approved_graph_counts(ability_map) + graph_metrics = {"node_count": 0, "edge_count": 0} + try: + dependency_graph = service.dependency_graph_elements(repository.id) + graph_metrics = { + "node_count": int(dependency_graph["metrics"]["node_count"]), + "edge_count": int(dependency_graph["metrics"]["edge_count"]), + } + except (NotFoundError, ValueError): + pass + snapshot = ( + service.store.get_snapshot(latest_run.snapshot_id) + if latest_run is not None and latest_run.snapshot_id is not None + else None + ) + doc_presence = document_presence(snapshot.source_path if snapshot else "") + issues = dataset_assessment_issues( + fact_count=len(facts), + chunk_count=len(chunks), + candidate_counts=candidate_counts, + approved_counts=approved_counts, + graph_metrics=graph_metrics, + doc_presence=doc_presence, + candidate_names=candidate_names, + ) + repositories.append( + { + "repository_id": repository.id, + "name": repository.name, + "status": repository.status, + "latest_analysis_run_id": latest_run.id if latest_run else None, + "latest_analysis_run_status": latest_run.status if latest_run else None, + "facts": len(facts), + "content_chunks": len(chunks), + "candidate_counts": candidate_counts, + "approved_counts": approved_counts, + "dependency_graph": graph_metrics, + "documents": doc_presence, + "candidate_ability_names": candidate_names, + "issues": issues, + } + ) + totals["repositories"] += 1 + totals["facts"] += len(facts) + totals["content_chunks"] += len(chunks) + totals["candidate_abilities"] += candidate_counts["abilities"] + totals["candidate_capabilities"] += candidate_counts["capabilities"] + totals["candidate_features"] += candidate_counts["features"] + totals["candidate_evidence"] += candidate_counts["evidence"] + totals["approved_abilities"] += approved_counts["abilities"] + totals["approved_capabilities"] += approved_counts["capabilities"] + totals["approved_features"] += approved_counts["features"] + totals["approved_evidence"] += approved_counts["evidence"] + totals["dependency_graph_nodes"] += graph_metrics["node_count"] + totals["dependency_graph_edges"] += graph_metrics["edge_count"] + return { + "schema_version": "repo-scoping-dataset-assessment/v1", + "summary": totals, + "repositories": repositories, + } + + +def candidate_graph_counts(graph) -> dict[str, int]: + capabilities = [ + capability + for ability in graph.abilities + for capability in ability.capabilities + ] + return { + "abilities": len(graph.abilities), + "capabilities": len(capabilities), + "features": sum(len(capability.features) for capability in capabilities), + "evidence": sum(len(capability.evidence) for capability in capabilities), + } + + +def approved_graph_counts(ability_map) -> dict[str, int]: + capabilities = [ + capability + for ability in ability_map.abilities + for capability in ability.capabilities + ] + return { + "scope": 1 if ability_map.scope else 0, + "abilities": len(ability_map.abilities), + "capabilities": len(capabilities), + "features": sum(len(capability.features) for capability in capabilities), + "evidence": sum(len(capability.evidence) for capability in capabilities), + } + + +def document_presence(source_path: str) -> dict[str, bool]: + if not source_path: + return { + "INTENT.md": False, + "SCOPE.md": False, + "README": False, + "CLAUDE.md": False, + "AGENTS.md": False, + } + root = Path(source_path) + return { + "INTENT.md": (root / "INTENT.md").is_file(), + "SCOPE.md": (root / "SCOPE.md").is_file(), + "README": any(root.glob("README*")), + "CLAUDE.md": (root / "CLAUDE.md").is_file(), + "AGENTS.md": (root / "AGENTS.md").is_file(), + } + + +def dataset_assessment_issues( + *, + fact_count: int, + chunk_count: int, + candidate_counts: dict[str, int], + approved_counts: dict[str, int], + graph_metrics: dict[str, int], + doc_presence: dict[str, bool], + candidate_names: list[str], +) -> list[str]: + issues: list[str] = [] + if fact_count and not candidate_counts["capabilities"]: + issues.append("facts-without-candidate-capabilities") + if chunk_count and doc_presence.get("SCOPE.md") and not candidate_counts["capabilities"]: + issues.append("scope-text-unused-for-lower-hierarchy") + if fact_count and not graph_metrics["node_count"]: + issues.append("facts-with-empty-dependency-graph") + if approved_counts["abilities"] == 0 and graph_metrics["node_count"] == 0: + issues.append("approved-hierarchy-missing-and-no-graph-fallback") + if any("repo-seed" in name.lower() for name in candidate_names): + issues.append("template-readme-contamination") + return issues + + +def dataset_assessment_markdown(report: dict[str, object]) -> str: + lines = ["# Repo-Scoping Dataset Assessment", ""] + summary = report["summary"] + lines.extend( + [ + f"- Repositories: {summary['repositories']}", + f"- Facts: {summary['facts']}", + f"- Candidate hierarchy: {summary['candidate_abilities']} abilities / " + f"{summary['candidate_capabilities']} capabilities / " + f"{summary['candidate_features']} features / " + f"{summary['candidate_evidence']} evidence", + f"- Approved hierarchy: {summary['approved_abilities']} abilities / " + f"{summary['approved_capabilities']} capabilities / " + f"{summary['approved_features']} features / " + f"{summary['approved_evidence']} evidence", + f"- Dependency graph: {summary['dependency_graph_nodes']} nodes / " + f"{summary['dependency_graph_edges']} edges", + "", + "| Repo | Run | Facts | Chunks | Candidate | Approved | Graph | Issues |", + "| --- | ---: | ---: | ---: | --- | --- | --- | --- |", + ] + ) + for item in report["repositories"]: + candidate = item["candidate_counts"] + approved = item["approved_counts"] + graph = item["dependency_graph"] + lines.append( + f"| {item['name']} | {item['latest_analysis_run_id'] or '-'} | " + f"{item['facts']} | {item['content_chunks']} | " + f"{candidate['abilities']}/{candidate['capabilities']}/" + f"{candidate['features']}/{candidate['evidence']} | " + f"{approved['abilities']}/{approved['capabilities']}/" + f"{approved['features']}/{approved['evidence']} | " + f"{graph['node_count']}/{graph['edge_count']} | " + f"{', '.join(item['issues']) or '-'} |" + ) + return "\n".join(lines) + "\n" + + def legacy_auto_approval_records_markdown(records) -> str: if not records: return "No legacy trusted auto-approval records found.\n" diff --git a/src/repo_scoping/core/service.py b/src/repo_scoping/core/service.py index af6880d..10dfb5e 100644 --- a/src/repo_scoping/core/service.py +++ b/src/repo_scoping/core/service.py @@ -2,6 +2,7 @@ from __future__ import annotations from collections.abc import Sequence from dataclasses import asdict, replace +from pathlib import Path from typing import Any from repo_scoping.acceptance import ( @@ -1492,6 +1493,22 @@ class RegistryService: ability_map = self.store.get_ability_map(repository_id) facts_by_id = {fact.id: fact for fact in self.store.list_observed_facts(repository_id)} characteristic_index = self._dependency_characteristic_index(ability_map) + latest_candidate_graph = None + include_candidates = not ability_map.abilities + if include_candidates: + latest_run = self._latest_completed_run(repository_id) + latest_candidate_graph = ( + self._candidate_graph_or_none(repository_id, latest_run.id) + if latest_run is not None + else None + ) + if latest_candidate_graph is not None: + characteristic_index.update( + self._candidate_dependency_characteristic_index( + latest_candidate_graph, + ability_map, + ) + ) nodes: dict[str, dict[str, object]] = {} edge_sources: dict[str, DependencyEdge] = {} @@ -1513,6 +1530,16 @@ class RegistryService: if (display_edge := self._dependency_display_edge(edge, facts_by_id)) is not None ] + if latest_candidate_graph is not None: + graph_edges.extend( + display_edge + for edge in self._candidate_dependency_edges( + latest_candidate_graph, + ability_map, + ) + if (display_edge := self._dependency_display_edge(edge, facts_by_id)) + is not None + ) def ensure_node(kind: str, key: str, item_id: int | None) -> None: if key in nodes: @@ -1555,7 +1582,7 @@ class RegistryService: "layer": self._dependency_layer(kind), "label": detail.get("label") or self._dependency_node_label(repository_id, kind, key, item_id), - "reviewState": "accepted", + "reviewState": detail.get("reviewState", "accepted"), "name": detail.get("name") or self._dependency_node_label(repository_id, kind, key, item_id), "description": detail.get("description", ""), @@ -1563,7 +1590,7 @@ class RegistryService: "attributes": detail.get("attributes", []), "confidence": detail.get("confidence"), "visualSize": self._dependency_node_size(detail.get("confidence")), - "ownership": self._ownership_for_kind(kind), + "ownership": detail.get("ownership", self._ownership_for_kind(kind)), "freshnessState": ( impact_item.freshness_state if impact_item is not None @@ -1587,6 +1614,7 @@ class RegistryService: class_name for class_name in ( kind, + str(detail.get("reviewState", "accepted")), "stale" if impact_item is not None else "current", "changed" if is_changed_fact else "", ) @@ -2627,6 +2655,336 @@ class RegistryService: ) return self.store.get_ability_map(repository_id) + def document_review( + self, + repository_id: int, + document_name: str, + ) -> dict[str, object]: + normalized = document_name.upper() + if normalized not in {"INTENT.MD", "SCOPE.MD"}: + raise ValueError("document_name must be INTENT.md or SCOPE.md") + repository = self.store.get_repository(repository_id) + latest_run = self._latest_completed_run(repository_id) + facts = ( + self.store.list_observed_facts(repository_id, latest_run.id) + if latest_run is not None + else [] + ) + chunks = ( + self.store.list_content_chunks(repository_id, latest_run.id) + if latest_run is not None + else [] + ) + ability_map = self.store.get_ability_map(repository_id) + candidate_graph = ( + self._candidate_graph_or_none(repository_id, latest_run.id) + if latest_run is not None + else None + ) + snapshot = ( + self.store.get_snapshot(latest_run.snapshot_id) + if latest_run is not None and latest_run.snapshot_id is not None + else None + ) + source_root = Path(snapshot.source_path) if snapshot is not None else None + filename = "INTENT.md" if normalized == "INTENT.MD" else "SCOPE.md" + current_path = source_root / filename if source_root is not None else None + current_content = "" + if current_path is not None and current_path.is_file(): + current_content = current_path.read_text(encoding="utf-8", errors="ignore") + draft_content = ( + self._draft_intent_document(repository, ability_map, candidate_graph, facts, chunks) + if normalized == "INTENT.MD" + else self._draft_scope_document(repository, ability_map, candidate_graph, facts, chunks) + ) + return { + "repository": asdict(repository), + "document": filename, + "path": str(current_path) if current_path is not None else "", + "exists": bool(current_content), + "current_content": current_content, + "draft_content": draft_content, + "draft_kind": "ambitious-intent" if normalized == "INTENT.MD" else "current-scope", + "write_policy": ( + "review-only; repo-scoping does not write INTENT.md automatically" + if normalized == "INTENT.MD" + else "review-only from this endpoint; use the explicit scope write endpoint to write" + ), + "provenance": self._document_review_provenance( + latest_run.id if latest_run is not None else None, + facts, + chunks, + candidate_graph, + ), + } + + def _latest_completed_run(self, repository_id: int) -> AnalysisRun | None: + completed = [ + run + for run in self.store.list_analysis_runs(repository_id) + if run.status == "completed" + ] + return completed[-1] if completed else None + + def _candidate_graph_or_none( + self, + repository_id: int, + analysis_run_id: int, + ) -> CandidateGraph | None: + try: + return self.store.get_candidate_graph(repository_id, analysis_run_id) + except NotFoundError: + return None + + def _draft_intent_document( + self, + repository: Repository, + ability_map: RepositoryAbilityMap, + candidate_graph: CandidateGraph | None, + facts: list[ObservedFact], + chunks: list[ContentChunk], + ) -> str: + one_liner = ( + self._scope_one_liner_from_chunks(chunks) + or ability_map.scope.description + or repository.description + or f"{repository.name} should provide clearly reviewable repository utility." + ) + capabilities = self._draft_capability_names(ability_map, candidate_graph) + boundaries = self._scope_section_items(chunks, "Not Relevant When") + related = self._scope_section_items(chunks, "Related / Overlapping") + lines = [ + "# INTENT", + "", + "> Draft generated by repo-scoping for review.", + "> This is ambitious design intent derived from current scope, facts, and candidates.", + "> It is not written automatically.", + "", + "## Purpose", + "", + self._ambitious_intent_sentence(one_liner), + "", + "## Intended Capabilities", + "", + ] + if capabilities: + lines.extend(f"- {name}" for name in capabilities) + else: + lines.append("- ") + lines.extend(["", "## Success Criteria", ""]) + lines.extend( + [ + "- The repository's useful behavior can be explained from source-linked evidence.", + "- Candidate capabilities can be reviewed without relying on template boilerplate.", + "- Scope and intent remain separate: current behavior informs but does not define ambition.", + ] + ) + lines.extend(["", "## Boundaries", ""]) + if boundaries: + lines.extend(boundaries) + else: + lines.append("- ") + if related: + lines.extend(["", "## Related Repositories", ""]) + lines.extend(related) + return "\n".join(lines).rstrip() + "\n" + + def _draft_scope_document( + self, + repository: Repository, + ability_map: RepositoryAbilityMap, + candidate_graph: CandidateGraph | None, + facts: list[ObservedFact], + chunks: list[ContentChunk], + ) -> str: + one_liner = ( + self._scope_one_liner_from_chunks(chunks) + or ability_map.scope.description + or repository.description + or f"{repository.name} has observed repository behavior under review." + ) + capabilities = self._draft_capability_names(ability_map, candidate_graph) + relevant = self._scope_section_items(chunks, "Relevant When") + not_relevant = self._scope_section_items(chunks, "Not Relevant When") + paths = sorted({fact.path for fact in facts if fact.path})[:8] + lines = [ + "# SCOPE", + "", + "> Draft generated by repo-scoping for review.", + "> This describes current understood behavior and should be edited before writing.", + "", + "---", + "", + "## One-liner", + "", + one_liner, + "", + "## Core Idea", + "", + self._scope_core_idea_from_chunks(chunks) or one_liner, + "", + "## Relevant When", + "", + ] + lines.extend(relevant or ["- "]) + lines.extend(["", "## Not Relevant When", ""]) + lines.extend(not_relevant or ["- "]) + lines.extend(["", "## Current State", ""]) + lines.extend( + [ + f"- Repository status: {repository.status}", + f"- Facts observed: {len(facts)}", + f"- Candidate capabilities: {len(capabilities)}", + ] + ) + lines.extend(["", "## Getting Oriented", ""]) + if paths: + lines.extend( + [ + f"- Start with: {paths[0]}", + f"- Key files / directories: {', '.join(paths)}", + ] + ) + else: + lines.append("- ") + lines.extend(["", "## Provided Capabilities", ""]) + if capabilities: + for name in capabilities: + lines.extend( + [ + "```capability", + "type: draft", + f"title: {name}", + "description: Review this candidate capability before treating it as scope truth.", + "keywords: [draft, review-required]", + "```", + "", + ] + ) + else: + lines.append("") + return "\n".join(lines).rstrip() + "\n" + + def _document_review_provenance( + self, + analysis_run_id: int | None, + facts: list[ObservedFact], + chunks: list[ContentChunk], + candidate_graph: CandidateGraph | None, + ) -> dict[str, object]: + return { + "analysis_run_id": analysis_run_id, + "fact_count": len(facts), + "content_chunk_count": len(chunks), + "candidate_counts": ( + { + "abilities": len(candidate_graph.abilities), + "capabilities": sum( + len(ability.capabilities) for ability in candidate_graph.abilities + ), + "features": sum( + len(capability.features) + for ability in candidate_graph.abilities + for capability in ability.capabilities + ), + "evidence": sum( + len(capability.evidence) + for ability in candidate_graph.abilities + for capability in ability.capabilities + ), + } + if candidate_graph is not None + else { + "abilities": 0, + "capabilities": 0, + "features": 0, + "evidence": 0, + } + ), + "source_paths": sorted({fact.path for fact in facts if fact.path})[:12], + } + + def _draft_capability_names( + self, + ability_map: RepositoryAbilityMap, + candidate_graph: CandidateGraph | None, + ) -> list[str]: + approved = [ + capability.name + for ability in ability_map.abilities + for capability in ability.capabilities + ] + if approved: + return approved[:12] + if candidate_graph is None: + return [] + names = [ + capability.name + for ability in candidate_graph.abilities + for capability in ability.capabilities + if capability.status == "candidate" + ] + if names: + return names[:12] + return [ability.name for ability in candidate_graph.abilities[:3]] + + def _scope_one_liner_from_chunks(self, chunks: list[ContentChunk]) -> str: + for chunk in self._scope_chunks(chunks): + lines = chunk.text.splitlines() + for index, raw_line in enumerate(lines): + if raw_line.strip().lower() == "## one-liner": + for following in lines[index + 1 :]: + candidate = following.strip() + if candidate and not candidate.startswith(("---", ">")): + return candidate.strip(" .") + return "" + + def _scope_core_idea_from_chunks(self, chunks: list[ContentChunk]) -> str: + items = self._scope_section_items(chunks, "Core Idea") + return "\n".join(items) if items else "" + + def _scope_section_items( + self, + chunks: list[ContentChunk], + section_name: str, + ) -> list[str]: + wanted = section_name.lower() + items: list[str] = [] + in_section = False + for chunk in self._scope_chunks(chunks): + for raw_line in chunk.text.splitlines(): + line = raw_line.strip() + if line.startswith("## "): + in_section = line.lstrip("#").strip().lower() == wanted + continue + if not in_section or not line or line.startswith("---"): + continue + if line.startswith("```"): + continue + if line.startswith("- "): + items.append(line) + elif not line.startswith("#"): + items.append(line) + return items[:10] + + def _scope_chunks(self, chunks: list[ContentChunk]) -> list[ContentChunk]: + return sorted( + [ + chunk + for chunk in chunks + if chunk.kind == "scope" + or chunk.metadata.get("source_role") == "derived_scope" + or chunk.path.lower().endswith("scope.md") + ], + key=lambda chunk: (chunk.path, chunk.start_line), + ) + + def _ambitious_intent_sentence(self, current_scope: str) -> str: + cleaned = current_scope.strip().rstrip(".") + if not cleaned: + return "Provide a source-linked, reviewable repository capability." + return f"Provide a deliberate, reviewable implementation of: {cleaned}." + def ability_map(self, repository_id: int) -> RepositoryAbilityMap: return self.store.get_ability_map(repository_id) @@ -2965,6 +3323,74 @@ class RegistryService: "sourceReferences": [ asdict(source_ref) for source_ref in evidence.source_refs ], + } + return index + + def _candidate_dependency_characteristic_index( + self, + candidate_graph: CandidateGraph, + ability_map: RepositoryAbilityMap, + ) -> dict[str, dict[str, object]]: + index: dict[str, dict[str, object]] = { + self._candidate_dependency_key("scope", ability_map.scope.id): { + "name": ability_map.scope.name, + "description": ability_map.scope.description, + "primaryClass": "draft-scope", + "attributes": ["draft", "scope", "candidate-derived"], + "confidence": ability_map.scope.confidence, + "reviewState": "draft", + "ownership": "curator_owned", + "sourceReferences": [], + } + } + for ability in candidate_graph.abilities: + index[self._candidate_dependency_key("ability", ability.id)] = { + "name": ability.name, + "description": ability.description, + "primaryClass": ability.primary_class, + "attributes": ability.attributes, + "confidence": ability.confidence, + "reviewState": ability.status, + "ownership": "mixed", + "sourceReferences": [asdict(ref) for ref in ability.source_refs], + } + for capability in ability.capabilities: + index[self._candidate_dependency_key("capability", capability.id)] = { + "name": capability.name, + "description": capability.description, + "primaryClass": capability.primary_class, + "attributes": capability.attributes, + "confidence": capability.confidence, + "reviewState": capability.status, + "ownership": "mixed", + "sourceReferences": [asdict(ref) for ref in capability.source_refs], + } + for feature in capability.features: + index[self._candidate_dependency_key("feature", feature.id)] = { + "name": feature.name, + "description": feature.location, + "primaryClass": feature.primary_class or feature.type, + "attributes": feature.attributes, + "confidence": feature.confidence, + "path": feature.location, + "reviewState": feature.status, + "ownership": "mixed", + "sourceReferences": [ + asdict(source_ref) for source_ref in feature.source_refs + ], + } + for evidence in capability.evidence: + index[self._candidate_dependency_key("evidence", evidence.id)] = { + "name": evidence.reference, + "description": evidence.type, + "primaryClass": evidence.type, + "attributes": [evidence.type, evidence.strength], + "confidence": self._evidence_confidence(evidence.strength), + "reviewState": evidence.status, + "ownership": "mixed", + "sourceReferences": [ + asdict(source_ref) for source_ref in evidence.source_refs + ], } return index @@ -3223,6 +3649,134 @@ class RegistryService: ) return edges + def _candidate_dependency_edges( + self, + candidate_graph: CandidateGraph, + ability_map: RepositoryAbilityMap, + ) -> list[DependencyEdge]: + edges: list[DependencyEdge] = [] + scope_key = self._candidate_dependency_key("scope", ability_map.scope.id) + for ability in candidate_graph.abilities: + ability_key = self._candidate_dependency_key("ability", ability.id) + edges.append( + self._dependency_edge( + source_kind="ability", + source_id=ability.id, + source_key=ability_key, + target_kind="scope", + target_id=ability_map.scope.id, + target_key=scope_key, + dependency_type="draft-summarizes", + strength="medium", + source="candidate_graph", + ) + ) + for source_ref in ability.source_refs: + edges.append( + self._dependency_edge( + source_kind="fact", + source_id=source_ref.fact_id, + source_key=self._source_ref_fact_key(source_ref), + target_kind="ability", + target_id=ability.id, + target_key=ability_key, + dependency_type="observes-draft", + strength="medium", + source="candidate_source_ref", + ) + ) + for capability in ability.capabilities: + capability_key = self._candidate_dependency_key( + "capability", + capability.id, + ) + edges.append( + self._dependency_edge( + source_kind="capability", + source_id=capability.id, + source_key=capability_key, + target_kind="ability", + target_id=ability.id, + target_key=ability_key, + dependency_type="draft-realizes", + strength="medium", + source="candidate_graph", + ) + ) + for source_ref in capability.source_refs: + edges.append( + self._dependency_edge( + source_kind="fact", + source_id=source_ref.fact_id, + source_key=self._source_ref_fact_key(source_ref), + target_kind="capability", + target_id=capability.id, + target_key=capability_key, + dependency_type="observes-draft", + strength="medium", + source="candidate_source_ref", + ) + ) + for feature in capability.features: + feature_key = self._candidate_dependency_key("feature", feature.id) + edges.append( + self._dependency_edge( + source_kind="feature", + source_id=feature.id, + source_key=feature_key, + target_kind="capability", + target_id=capability.id, + target_key=capability_key, + dependency_type="draft-supports", + strength="medium", + source="candidate_graph", + ) + ) + for source_ref in feature.source_refs: + edges.append( + self._dependency_edge( + source_kind="fact", + source_id=source_ref.fact_id, + source_key=self._source_ref_fact_key(source_ref), + target_kind="feature", + target_id=feature.id, + target_key=feature_key, + dependency_type="observes-draft", + strength="medium", + source="candidate_source_ref", + ) + ) + for evidence in capability.evidence: + evidence_key = self._candidate_dependency_key("evidence", evidence.id) + edges.append( + self._dependency_edge( + source_kind="evidence", + source_id=evidence.id, + source_key=evidence_key, + target_kind="capability", + target_id=capability.id, + target_key=capability_key, + dependency_type="draft-supports", + strength=evidence.strength or "medium", + source="candidate_graph", + ) + ) + for source_ref in evidence.source_refs: + edges.append( + self._dependency_edge( + source_kind="fact", + source_id=source_ref.fact_id, + source_key=self._source_ref_fact_key(source_ref), + target_kind="evidence", + target_id=evidence.id, + target_key=evidence_key, + dependency_type="observes-draft", + strength=evidence.strength or "medium", + source="candidate_source_ref", + ) + ) + return edges + def _dependency_edge( self, *, @@ -3253,6 +3807,9 @@ class RegistryService: def _dependency_key(self, kind: str, item_id: int) -> str: return f"{kind}:{item_id}" + def _candidate_dependency_key(self, kind: str, item_id: int) -> str: + return f"candidate:{kind}:{item_id}" if kind != "scope" else f"draft:scope:{item_id}" + def _source_ref_fact_key(self, source_ref) -> str: return f"fact:{source_ref.kind}:{source_ref.path}:{source_ref.name}" diff --git a/src/repo_scoping/web_api/app.py b/src/repo_scoping/web_api/app.py index 1d51481..c1ec284 100644 --- a/src/repo_scoping/web_api/app.py +++ b/src/repo_scoping/web_api/app.py @@ -1212,6 +1212,38 @@ def get_ability_map( raise HTTPException(status_code=404, detail=str(exc)) from exc +@app.get( + "/repos/{repository_id}/intent/review", + tags=["scope"], +) +def review_repository_intent( + repository_id: int, + service: RegistryService = Depends(get_service), +) -> dict[str, object]: + try: + return service.document_review(repository_id, "INTENT.md") + except NotFoundError as exc: + raise HTTPException(status_code=404, detail=str(exc)) from exc + except ValueError as exc: + raise HTTPException(status_code=400, detail=str(exc)) from exc + + +@app.get( + "/repos/{repository_id}/scope/review", + tags=["scope"], +) +def review_repository_scope( + repository_id: int, + service: RegistryService = Depends(get_service), +) -> dict[str, object]: + try: + return service.document_review(repository_id, "SCOPE.md") + except NotFoundError as exc: + raise HTTPException(status_code=404, detail=str(exc)) from exc + except ValueError as exc: + raise HTTPException(status_code=400, detail=str(exc)) from exc + + @app.get( "/repos/{repository_id}/dependency-graph", tags=["visualization"], diff --git a/src/repo_scoping/web_ui/views.py b/src/repo_scoping/web_ui/views.py index ec348e2..abfd862 100644 --- a/src/repo_scoping/web_ui/views.py +++ b/src/repo_scoping/web_ui/views.py @@ -1104,6 +1104,61 @@ def repository_scope_document( ) +@router.get("/ui/repos/{repository_id}/intent-review") +def repository_intent_review( + repository_id: int, + service: RegistryService = Depends(get_service), +) -> HTMLResponse: + return repository_document_review_page(repository_id, "INTENT.md", service) + + +@router.get("/ui/repos/{repository_id}/scope-review") +def repository_scope_review( + repository_id: int, + service: RegistryService = Depends(get_service), +) -> HTMLResponse: + return repository_document_review_page(repository_id, "SCOPE.md", service) + + +def repository_document_review_page( + repository_id: int, + document_name: str, + service: RegistryService, +) -> HTMLResponse: + payload = service.document_review(repository_id, document_name) + repository = service.get_repository(repository_id) + display_name = repository_display_name(repository) + current = str(payload.get("current_content") or "") + draft = str(payload.get("draft_content") or "") + provenance = payload.get("provenance") or {} + body = f""" +
+

{escape(document_name)} Review

+ Repository +
+
+

{escape(str(payload.get("write_policy", "")))}

+

{'exists' if payload.get("exists") else 'missing'} + {escape(str(payload.get("path", "")))}

+ + +

analysis run {escape(str(provenance.get("analysis_run_id", "")))} · + {escape(str(provenance.get("fact_count", 0)))} facts · + {escape(str((provenance.get("candidate_counts") or {}).get("capabilities", 0)))} candidate capabilities

+
+ """ + return page( + f"{document_name} Review", + body, + selected_repository=display_name, + selected_repository_id=repository.id, + ) + + @router.get("/ui/discovery") def discovery_page(service: RegistryService = Depends(get_service)) -> HTMLResponse: repositories = service.list_repositories() @@ -1514,6 +1569,8 @@ def repository_detail( Dependency Graph Export SCOPE + Scope Draft + Intent Draft Back

{escape(repository.description or '')}

diff --git a/tests/test_candidate_graph.py b/tests/test_candidate_graph.py index 1558543..857b781 100644 --- a/tests/test_candidate_graph.py +++ b/tests/test_candidate_graph.py @@ -182,6 +182,115 @@ def test_candidate_generator_prefers_intent_over_derived_scope_for_ability_name( assert graph[0].name == "Provide A Provider-agnostic LLM Connector" +def test_candidate_generator_uses_scope_one_liner_over_template_readme(): + repository = Repository( + id=1, + name="ops-warden", + url="/tmp/ops-warden", + description=None, + branch="main", + status="analyzed", + ) + facts = [ + fact(1, "documentation", "README", "README.md"), + fact(2, "scope", "SCOPE", "SCOPE.md", metadata={"source_role": "derived_scope"}), + ] + chunks = [ + chunk( + 1, + "documentation", + "README.md", + "# repo-seed\nA git repository template to bootstrap coulomb projects from.", + end_line=2, + ), + chunk( + 2, + "scope", + "SCOPE.md", + "# SCOPE\n\n## One-liner\n" + "SSH Certificate Authority and credential issuance for the ops fleet.\n", + end_line=4, + ), + ] + chunks[1].metadata["source_role"] = "derived_scope" + + graph = CandidateGraphGenerator().generate(repository, facts, chunks) + + assert graph[0].name == "SSH Certificate Authority And Credential Issuance For The Ops Fleet" + assert "repo-seed" not in graph[0].description + + +def test_candidate_generator_extracts_current_capabilities_from_scope_blocks(): + repository = Repository( + id=1, + name="railiance-apps", + url="/tmp/railiance-apps", + description=None, + branch="main", + status="analyzed", + ) + facts = [ + fact(1, "scope", "SCOPE", "SCOPE.md", metadata={"source_role": "derived_scope"}), + ] + chunks = [ + chunk( + 1, + "scope", + "SCOPE.md", + "# SCOPE\n\n## One-liner\n" + "S5 Workloads and Experience layer of the Railiance OAS Stack.\n\n" + "## Provided Capabilities\n\n" + "```capability\n" + "type: infrastructure\n" + "title: Application workload deployment\n" + "description: Deploy and manage user-facing applications as Helm releases.\n" + "keywords: [gitea, helm, application]\n" + "```\n", + end_line=12, + ), + ] + chunks[0].metadata["source_role"] = "derived_scope" + + graph = CandidateGraphGenerator().generate(repository, facts, chunks) + + ability = graph[0] + assert ability.name == "S5 Workloads And Experience Layer Of The Railiance OAS Stack" + capability = ability.capabilities[0] + assert capability.name == "Application workload deployment" + assert capability.primary_class == "infrastructure" + assert {"scope-derived", "current-state", "review-required-scope"} <= set( + capability.attributes + ) + assert capability.features[0].name == "Application workload deployment" + assert capability.features[0].location == "SCOPE.md" + assert capability.evidence[0].reference == "SCOPE.md" + + +def test_candidate_generator_adds_fact_derived_capability_when_no_stronger_layers(): + repository = Repository( + id=1, + name="railiance-empty-layer", + url="/tmp/railiance-empty-layer", + description=None, + branch="main", + status="analyzed", + ) + facts = [ + fact(1, "config", "sops config", ".sops.yaml"), + fact(2, "manifest", "pyproject.toml", "pyproject.toml"), + ] + + graph = CandidateGraphGenerator().generate(repository, facts) + + capability = graph[0].capabilities[0] + assert capability.name == "Manage Repository Configuration" + assert capability.primary_class == "fact-derived" + assert {feature.type for feature in capability.features} == { + "configuration", + "manifest", + } + + def test_candidate_generator_enriches_descriptions_from_content_chunks(): repository = Repository( id=1, diff --git a/tests/test_cli.py b/tests/test_cli.py index 786a6d6..f0923b0 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -216,6 +216,43 @@ def test_list_legacy_auto_approvals_cli_writes_json_inventory(tmp_path): assert records[0]["current_approved_ability_count"] == 1 +def test_assess_dataset_cli_reports_sparse_hierarchy_issues(tmp_path): + service = make_service(tmp_path) + source = tmp_path / "scope-only" + source.mkdir() + (source / "SCOPE.md").write_text( + "# SCOPE\n\n## One-liner\nScope-only current behavior.\n", + encoding="utf-8", + ) + repository = service.register_repository(name="Scope Only", url=str(source)) + service.analyze_repository(repository.id, use_llm_assistance=False) + output_path = tmp_path / "dataset.json" + + exit_code = main( + [ + "assess-dataset", + "--format", + "json", + "--output", + str(output_path), + "--database-path", + str(tmp_path / "registry.sqlite3"), + "--checkout-root", + str(tmp_path / "checkouts"), + ] + ) + + report = json.loads(output_path.read_text(encoding="utf-8")) + repo_report = report["repositories"][0] + assert exit_code == 0 + assert report["schema_version"] == "repo-scoping-dataset-assessment/v1" + assert repo_report["name"] == "Scope Only" + assert repo_report["documents"]["SCOPE.md"] is True + assert repo_report["candidate_counts"]["capabilities"] >= 1 + assert repo_report["dependency_graph"]["node_count"] > 0 + assert "facts-with-empty-dependency-graph" not in repo_report["issues"] + + def test_self_assess_cli_exports_challenger_and_comparison(tmp_path): source = write_repo(tmp_path) golden_path = tmp_path / "golden.json" diff --git a/tests/test_quality_criteria.py b/tests/test_quality_criteria.py index 403e82b..188a494 100644 --- a/tests/test_quality_criteria.py +++ b/tests/test_quality_criteria.py @@ -18,6 +18,8 @@ def test_quality_criteria_registry_is_versioned_and_reviewable(): "RREG-QC-004", "RREG-QC-005", "RREG-QC-006", + "RREG-QC-007", + "RREG-QC-008", } for criterion in registry.criteria: assert criterion.description diff --git a/tests/test_quality_gates.py b/tests/test_quality_gates.py index 637f305..7df1579 100644 --- a/tests/test_quality_gates.py +++ b/tests/test_quality_gates.py @@ -84,7 +84,67 @@ def test_quality_gates_flag_circular_scope_evidence(): outcomes = evaluate_candidate_capability_quality(capability) assert outcomes[0].criterion_id == "RREG-QC-005" - assert outcomes[0].outcome == "rejected" + assert outcomes[0].outcome == "requires_review" + + +def test_quality_gates_flag_scope_derived_candidates_for_review(): + capability = CandidateCapability( + id=12, + name="Application workload deployment", + description="Extracted from SCOPE.md.", + inputs=[], + outputs=[], + confidence=0.6, + status="candidate", + source_refs=[source_ref("SCOPE.md", "scope")], + confidence_label="medium", + primary_class="infrastructure", + attributes=["scope-derived", "review-required-scope"], + ) + + outcomes = evaluate_candidate_capability_quality(capability) + + outcome_ids = {outcome.criterion_id for outcome in outcomes} + assert {"RREG-QC-005"} <= outcome_ids + assert all(outcome.outcome == "requires_review" for outcome in outcomes) + + +def test_quality_gates_flag_template_contaminated_abilities(): + graph = CandidateGraph( + repository=Repository( + id=1, + name="Ops Warden", + url=".", + description=None, + branch="main", + status="analyzed", + ), + analysis_run=AnalysisRun( + id=1, + repository_id=1, + snapshot_id=None, + status="completed", + started_at="2026-05-15T00:00:00Z", + completed_at="2026-05-15T00:00:01Z", + error_message=None, + scanner_version="deterministic-v1", + ), + abilities=[ + CandidateAbility( + id=1, + name="A Git Repository Template To Bootstrap Coulomb Projects", + description="Derived from repo-seed README boilerplate.", + confidence=0.7, + status="candidate", + source_refs=[source_ref("README.md", "documentation")], + ) + ], + ) + + outcomes = evaluate_candidate_graph_quality(graph) + + assert outcomes[0].criterion_id == "RREG-QC-007" + assert outcomes[0].outcome == "downgraded" def test_quality_gate_outcomes_are_serializable_for_assessment_artifacts(): diff --git a/tests/test_registry_service.py b/tests/test_registry_service.py index a56b019..e72db22 100644 --- a/tests/test_registry_service.py +++ b/tests/test_registry_service.py @@ -498,6 +498,49 @@ def test_dependency_graph_deduplicates_document_fact_nodes(tmp_path): assert fact_nodes[0]["label"] == "README.md (documentation)" +def test_dependency_graph_renders_candidate_fallback_when_approved_hierarchy_missing(tmp_path): + service = make_service(tmp_path) + source = tmp_path / "scope-candidate" + source.mkdir() + (source / "SCOPE.md").write_text( + "# SCOPE\n\n" + "## One-liner\n" + "S5 Workloads and Experience layer.\n\n" + "## Provided Capabilities\n\n" + "```capability\n" + "type: infrastructure\n" + "title: Application workload deployment\n" + "description: Deploy applications as Helm releases.\n" + "keywords: [helm]\n" + "```\n", + encoding="utf-8", + ) + repository = service.register_repository(name="Scope Candidate", url=str(source)) + service.analyze_repository( + repository.id, + source_path=str(source), + use_llm_assistance=False, + ) + + payload = service.dependency_graph_elements(repository.id, use_latest_profile=False) + + nodes = [ + element["data"] + for element in payload["elements"] + if "source" not in element["data"] + ] + edges = [ + element["data"] + for element in payload["elements"] + if "source" in element["data"] + ] + assert payload["metrics"]["node_count"] > 0 + assert any(node["reviewState"] == "candidate" for node in nodes) + assert any(node["reviewState"] == "draft" for node in nodes) + assert any(edge["dependencyType"] == "draft-realizes" for edge in edges) + assert any(edge["dependencyType"] == "draft-supports" for edge in edges) + + def test_manual_registry_updates_and_deletes_approved_entries(tmp_path): service = make_service(tmp_path) repository = service.register_repository( diff --git a/tests/test_web_api.py b/tests/test_web_api.py index 42edd57..37a7f10 100644 --- a/tests/test_web_api.py +++ b/tests/test_web_api.py @@ -466,6 +466,12 @@ def test_openapi_contract_snapshot_for_stable_agent_paths(): "/repos/{repository_id}/export": { "get": {"tags": ["discovery"], "success_schema": "application/x-yaml"} }, + "/repos/{repository_id}/intent/review": { + "get": {"tags": ["scope"], "success_schema": "object"} + }, + "/repos/{repository_id}/scope/review": { + "get": {"tags": ["scope"], "success_schema": "object"} + }, "/repos/{repo_slug}/scope": { "get": {"tags": ["scope"], "success_schema": None} }, @@ -837,6 +843,62 @@ def test_api_generates_diffs_and_writes_scope_md(tmp_path): app.dependency_overrides.clear() +def test_api_reviews_intent_and_scope_drafts_without_writing_intent(tmp_path): + source = tmp_path / "draft-repo" + source.mkdir() + (source / "SCOPE.md").write_text( + "# SCOPE\n\n" + "## One-liner\n" + "S5 Workloads and Experience layer.\n\n" + "## Provided Capabilities\n\n" + "```capability\n" + "type: infrastructure\n" + "title: Application workload deployment\n" + "description: Deploy applications as Helm releases.\n" + "keywords: [helm]\n" + "```\n", + encoding="utf-8", + ) + + def override_settings(): + return Settings( + database_path=str(tmp_path / "draft-api.sqlite3"), + checkout_root=str(tmp_path / "checkouts"), + ) + + app.dependency_overrides[get_settings] = override_settings + client = TestClient(app) + try: + repository = client.post( + "/repos", + json={"name": "Draft Repo", "url": str(source)}, + ).json() + analysis = client.post( + f"/repos/{repository['id']}/analysis-runs", + json={"source_path": str(source), "use_llm_assistance": False}, + ).json() + assert analysis["analysis_run"]["status"] == "completed" + + intent_review = client.get(f"/repos/{repository['id']}/intent/review") + assert intent_review.status_code == 200 + intent_payload = intent_review.json() + assert intent_payload["document"] == "INTENT.md" + assert intent_payload["exists"] is False + assert "Application workload deployment" in intent_payload["draft_content"] + assert "does not write INTENT.md automatically" in intent_payload["write_policy"] + assert not (source / "INTENT.md").exists() + + scope_review = client.get(f"/repos/{repository['id']}/scope/review") + assert scope_review.status_code == 200 + scope_payload = scope_review.json() + assert scope_payload["exists"] is True + assert "S5 Workloads and Experience layer" in scope_payload["current_content"] + assert "Application workload deployment" in scope_payload["draft_content"] + assert scope_payload["provenance"]["candidate_counts"]["capabilities"] >= 1 + finally: + app.dependency_overrides.clear() + + def test_api_compare_gap_and_export_use_cases(tmp_path): def override_settings(): return Settings( @@ -1550,6 +1612,14 @@ def test_ui_register_analyze_and_approve_loop(tmp_path): f'Dependency Graph' in detail_response.text ) + assert ( + f'Scope Draft' + in detail_response.text + ) + assert ( + f'Intent Draft' + in detail_response.text + ) repo_scope_response = client.get(f"/ui/repos/{repository_id}/scope") assert repo_scope_response.status_code == 200 @@ -1600,7 +1670,9 @@ def test_ui_register_analyze_and_approve_loop(tmp_path): assert "Content Chunks" in run_detail.text assert "README.md:1-2" in run_detail.text assert "ID " in run_detail.text - assert "No review decisions yet." in run_detail.text + assert "quality_gate_evaluation" in run_detail.text + assert "requires_review:" in run_detail.text + assert "without approving registry truth" in run_detail.text assert "Expectation Gaps" in run_detail.text assert "Record Gap" in run_detail.text @@ -1674,7 +1746,7 @@ def test_ui_register_analyze_and_approve_loop(tmp_path): assert "Discovery" in approved_detail.text assert "Export" in approved_detail.text assert "Elements" in approved_detail.text - assert "q=Report+Service+Status" in approved_detail.text + assert "q=UI+Repo+Owns+The+Status+Reporting+Scope" in approved_detail.text graph_response = client.get(f"/repos/{repository_id}/dependency-graph") assert graph_response.status_code == 200 @@ -1787,7 +1859,7 @@ def test_ui_register_analyze_and_approve_loop(tmp_path): f"/ui/repos/{repository_id}/elements?scope=facts&analysis_run_id={first_run_id}&type=facts" in approved_detail.text ) - assert "Report Service Status Through API And CLI Entry" in approved_detail.text + assert "UI Repo Owns The Status Reporting Scope" in approved_detail.text assert "Language: Python" in approved_detail.text assert "Framework: FastAPI" in approved_detail.text assert "interface:app.py:3" in approved_detail.text @@ -1801,7 +1873,7 @@ def test_ui_register_analyze_and_approve_loop(tmp_path): assert "Registry Capabilities" in approved_listing.text assert "Entry" in approved_listing.text assert "Approved only" in approved_listing.text - assert "Expose Repository Interface" in approved_listing.text + assert "UI Repo Owns The Status Reporting Scope" in approved_listing.text assert "Save" in approved_listing.text assert "Delete" in approved_listing.text @@ -1964,14 +2036,14 @@ def test_ui_register_analyze_and_approve_loop(tmp_path): filtered_search_response = client.get( "/ui/search", - params={ - "q": "repository", - "status": "indexed", - "language": "Python", - "ability": "Report Service Status", - "capability": "Repository", - }, - ) + params={ + "q": "repository", + "status": "indexed", + "language": "Python", + "ability": "UI Repo", + "capability": "Scope", + }, + ) assert filtered_search_response.status_code == 200 assert "UI Repo" in filtered_search_response.text diff --git a/workplans/RREG-WP-0018-agentic-hierarchy-and-intent-scope-review.md b/workplans/RREG-WP-0018-agentic-hierarchy-and-intent-scope-review.md index c556d20..4263bca 100644 --- a/workplans/RREG-WP-0018-agentic-hierarchy-and-intent-scope-review.md +++ b/workplans/RREG-WP-0018-agentic-hierarchy-and-intent-scope-review.md @@ -31,7 +31,7 @@ abilities, and draft scope from facts, source-linked text, and existing ## Dataset Assessment -The current `var/repo-scoping.sqlite3` dataset contains eight repositories. The +The initial `var/repo-scoping.sqlite3` dataset contained eight repositories. The new non-repo-scoping repositories all completed analysis, but only `ops-warden` produced a candidate capability and feature. Railiance repos mostly produced one candidate ability, zero candidate capabilities, zero candidate @@ -59,7 +59,7 @@ Observed patterns: ```task id: RREG-WP-0018-T01 -status: todo +status: done priority: high state_hub_task_id: "dd00a642-7c69-4ae2-b7ac-954c31a1c72a" ``` @@ -80,7 +80,7 @@ Acceptance criteria: ```task id: RREG-WP-0018-T02 -status: todo +status: done priority: high state_hub_task_id: "01eb03da-7a0e-4e22-ae2d-7596752d178e" ``` @@ -106,7 +106,7 @@ Acceptance criteria: ```task id: RREG-WP-0018-T03 -status: todo +status: done priority: high state_hub_task_id: "fd572f4d-d2f6-4c85-bbf5-f77829fd6e6a" ``` @@ -129,7 +129,7 @@ Acceptance criteria: ```task id: RREG-WP-0018-T04 -status: todo +status: done priority: high state_hub_task_id: "286d96e0-ec5a-4a55-bb50-62d20ab25830" ``` @@ -152,7 +152,7 @@ Acceptance criteria: ```task id: RREG-WP-0018-T05 -status: todo +status: done priority: high state_hub_task_id: "80bc671c-2361-47e5-8135-7c945de66437" ``` @@ -175,7 +175,7 @@ Acceptance criteria: ```task id: RREG-WP-0018-T06 -status: todo +status: done priority: medium state_hub_task_id: "4b74a058-b759-42d2-a243-7134dd907093" ``` @@ -197,7 +197,7 @@ Acceptance criteria: ```task id: RREG-WP-0018-T07 -status: todo +status: in_progress priority: medium state_hub_task_id: "cd1a3c14-076b-42da-8319-48310a964611" ``` @@ -213,3 +213,30 @@ Acceptance criteria: - Dependency graph element counts are non-zero for repositories with facts. - The comparison report makes it easy to judge whether the new result is better than the previous sparse output. + +## Implementation Update + +Implemented the comparison and generation infrastructure needed to rerun the +dataset: +- Added `repo-scoping assess-dataset` to summarize latest runs by facts, + chunks, candidate/approved hierarchy counts, graph coverage, document + presence, and sparse-hierarchy quality issues. +- Updated candidate generation so `SCOPE.md` one-liners and `Provided + Capabilities` blocks seed reviewable current-state abilities/capabilities, + while deterministic fact fallback now requires stronger configuration facts + and does not promote dependency-only repositories. +- Added review-only `INTENT.md`/`SCOPE.md` API and UI draft views. Missing + `INTENT.md` now produces an ambitious draft derived from scope/candidates + without writing the file. +- Added dependency graph fallback nodes/edges for candidate and draft + hierarchies so repos with facts no longer render empty just because approved + characteristics are absent. +- Added transparent quality criteria for template contamination and + scope-vs-intent separation; deterministic gates can require review but do not + accept registry truth. + +The latest local assessment command currently sees nine repositories because +`vantage-point` has been added. It still reports old sparse Railiance candidate +counts because those stored analysis runs predate this implementation. T07 stays +open until the affected repositories are rerun and compared against the sparse +baseline.