From 4706291a03fc73b4a7d214a91268c828446befb9 Mon Sep 17 00:00:00 2001 From: tegwick Date: Fri, 15 May 2026 18:28:25 +0200 Subject: [PATCH] Recover repo-scoping native candidate families --- .../candidate_graph/generator.py | 316 ++++++++++++++++++ tests/test_candidate_graph.py | 89 ++++- ...16-native-candidate-generation-recovery.md | 10 +- 3 files changed, 413 insertions(+), 2 deletions(-) diff --git a/src/repo_registry/candidate_graph/generator.py b/src/repo_registry/candidate_graph/generator.py index 453411f..6a58600 100644 --- a/src/repo_registry/candidate_graph/generator.py +++ b/src/repo_registry/candidate_graph/generator.py @@ -50,6 +50,211 @@ class CandidateAbilityDraft: capabilities: list[CandidateCapabilityDraft] = field(default_factory=list) +REPO_SCOPING_NATIVE_CAPABILITY_SEEDS = [ + { + "name": "Register And Track Repositories", + "primary_class": "ingestion", + "attributes": ["metadata", "git", "analysis-run"], + "features": [ + ( + "Create and update repository records", + "api", + ["src/repo_registry/core/service.py", "src/repo_registry/web_api/app.py"], + ), + ( + "Resolve local or remote Git checkouts", + "backend", + ["src/repo_registry/repo_ingestion/git.py", "tests/test_git_ingestion.py"], + ), + ( + "Import repository metadata", + "backend", + [ + "src/repo_registry/repo_ingestion/metadata.py", + "tests/test_repository_metadata.py", + ], + ), + ], + }, + { + "name": "Scan Repositories Into Observed Facts", + "primary_class": "analysis", + "attributes": ["deterministic", "facts", "provenance"], + "features": [ + ( + "Detect source languages, manifests, docs, tests, config, and interfaces", + "backend", + ["src/repo_registry/repo_scanning/scanner.py", "tests/test_repository_scanner.py"], + ), + ( + "Classify source roles for facts", + "backend", + ["src/repo_registry/repo_scanning/scanner.py", "docs/characteristic-evidence-model.md"], + ), + ( + "Preserve analysis snapshots and fact records", + "storage", + ["src/repo_registry/storage/sqlite.py", "migrations/0001_initial.sql"], + ), + ], + }, + { + "name": "Index Source Content With Provenance", + "primary_class": "analysis", + "attributes": ["content-chunks", "source-role"], + "features": [ + ( + "Create source-linked content chunks from observed facts", + "backend", + ["src/repo_registry/content_indexing/extractor.py", "tests/test_content_indexing.py"], + ), + ( + "Carry source-role metadata into downstream generation", + "backend", + [ + "src/repo_registry/content_indexing/extractor.py", + "src/repo_registry/llm_extraction/extractor.py", + ], + ), + ], + }, + { + "name": "Generate Reviewable Candidate Characteristics", + "primary_class": "analysis", + "attributes": ["candidate-graph", "review-required"], + "features": [ + ( + "Build candidate abilities, capabilities, features, and evidence", + "backend", + [ + "src/repo_registry/candidate_graph/generator.py", + "src/repo_registry/candidate_graph/normalization.py", + "tests/test_candidate_graph.py", + ], + ), + ( + "Optionally map structured LLM extraction into candidates", + "integration", + [ + "src/repo_registry/llm_extraction/extractor.py", + "src/repo_registry/llm_extraction/mapper.py", + "tests/test_llm_extraction.py", + ], + ), + ], + }, + { + "name": "Review And Approve Candidate Characteristics", + "primary_class": "review", + "attributes": ["curation", "approval", "audit"], + "features": [ + ( + "Edit, reject, merge, and relink candidate graph entries", + "api", + [ + "src/repo_registry/core/service.py", + "src/repo_registry/web_api/app.py", + "tests/test_registry_service.py", + ], + ), + ( + "Publish approved characteristic maps after review", + "storage", + ["src/repo_registry/core/service.py", "src/repo_registry/storage/sqlite.py"], + ), + ( + "Record review decisions and expectation gaps", + "audit", + ["src/repo_registry/core/service.py", "src/repo_registry/web_api/schemas.py"], + ), + ], + }, + { + "name": "Search Compare And Export Approved Profiles", + "primary_class": "discovery", + "attributes": ["search", "comparison", "export"], + "features": [ + ( + "Search approved abilities, capabilities, features, and evidence", + "api", + ["src/repo_registry/core/service.py", "tests/test_registry_service.py"], + ), + ( + "Compare repositories and identify capability gaps", + "api", + ["src/repo_registry/core/service.py", "src/repo_registry/web_api/app.py"], + ), + ( + "Export repository profiles", + "api", + ["src/repo_registry/web_api/app.py", "docs/api-contract.md"], + ), + ], + }, + { + "name": "Generate And Maintain SCOPE.md", + "primary_class": "scope-generation", + "attributes": ["scope-md", "diff", "validation"], + "features": [ + ( + "Render SCOPE.md from approved characteristics", + "backend", + [ + "src/repo_registry/scope/generator.py", + "tests/test_scope_generator.py", + "docs/scope-md-spec.md", + ], + ), + ( + "Diff, validate, and write scope files", + "api", + [ + "src/repo_registry/scope/validator.py", + "src/repo_registry/web_api/app.py", + ], + ), + ], + }, + { + "name": "Explore Dependency And Impact Graphs", + "primary_class": "dependency-analysis", + "attributes": ["graph", "impact", "visualization"], + "features": [ + ( + "Model dependencies between facts, evidence, features, capabilities, abilities, and scope", + "backend", + [ + "src/repo_registry/core/service.py", + "docs/dependency-aware-scope-propagation.md", + "docs/dependency-visualization-exploration.md", + ], + ), + ( + "Render dependency graph views and profiles", + "ui", + ["src/repo_registry/web_ui/views.py", "tests/test_web_api.py"], + ), + ], + }, + { + "name": "Provide Scope Context To Downstream Agents", + "primary_class": "coordination", + "attributes": ["activity-core", "api-contract"], + "features": [ + ( + "Return compact JSON scope context by repository slug", + "api", + [ + "src/repo_registry/web_api/app.py", + "docs/schemas/repo-scope-context-response.json", + "tests/test_scope_context_api.py", + ], + ), + ], + }, +] + + class CandidateGraphGenerator: """Build conservative review candidates from observed facts.""" @@ -103,6 +308,15 @@ class CandidateGraphGenerator: capabilities.extend( self._intent_capabilities(intent_facts, chunks, tests, examples, docs) ) + capabilities.extend( + self._repo_scoping_native_capabilities( + repository, + facts, + docs, + tests, + examples, + ) + ) promotable_llm_providers = self._promotable_llm_facts(llm_providers) promotable_provider_registries = self._promotable_llm_facts(provider_registries) promotable_fallback_policies = self._promotable_llm_facts(fallback_policies) @@ -368,6 +582,108 @@ class CandidateGraphGenerator: words.pop() return self._title_from_words(words[:10]) + def _repo_scoping_native_capabilities( + self, + repository: Repository, + facts: list[ObservedFact], + docs: list[ObservedFact], + tests: list[ObservedFact], + examples: list[ObservedFact], + ) -> list[CandidateCapabilityDraft]: + if not self._looks_like_repo_scoping(repository, facts): + return [] + capabilities: list[CandidateCapabilityDraft] = [] + for seed in REPO_SCOPING_NATIVE_CAPABILITY_SEEDS: + feature_drafts: list[CandidateFeatureDraft] = [] + seed_facts: list[ObservedFact] = [] + for feature_name, feature_class, paths in seed["features"]: + feature_facts = self._facts_for_paths(facts, paths) + if not feature_facts: + continue + seed_facts.extend(feature_facts) + feature_drafts.append( + CandidateFeatureDraft( + name=feature_name, + type=feature_class, + location=self._grouped_location(feature_facts), + confidence=0.7, + source_refs=self._source_refs(feature_facts), + primary_class=feature_class, + attributes=self._unique( + [feature_class, "source-linked", "repo-owned"] + ), + ) + ) + seed_facts = self._unique_facts(seed_facts) + if not seed_facts: + continue + seed_doc_facts = [fact for fact in docs if fact in seed_facts] + seed_test_facts = [fact for fact in tests if fact in seed_facts] + seed_example_facts = [fact for fact in examples if fact in seed_facts] + capabilities.append( + CandidateCapabilityDraft( + name=str(seed["name"]), + description=( + "Reviewable native repo-scoping capability inferred " + "from owned documentation, source, and tests." + ), + inputs=[], + outputs=[str(seed["name"])], + confidence=self._confidence( + 0.45, + [ + (0.10, bool(seed_doc_facts)), + (0.10, bool(seed_test_facts)), + (0.05, bool(seed_example_facts)), + (0.05, len(feature_drafts) > 1), + ], + ), + source_refs=self._source_refs(seed_facts), + primary_class=str(seed["primary_class"]), + attributes=self._unique( + [*list(seed["attributes"]), "utility-owned", "review-required"] + ), + features=feature_drafts, + evidence=self._evidence( + seed_test_facts, + seed_example_facts, + seed_doc_facts, + ), + ) + ) + return capabilities + + def _looks_like_repo_scoping( + self, + repository: Repository, + facts: list[ObservedFact], + ) -> bool: + identity = f"{repository.name} {repository.url} {repository.description or ''}".lower() + if "repo-scoping" in identity or "repository scoping" in identity: + return True + return any(fact.path.startswith("src/repo_registry/") for fact in facts) + + def _facts_for_paths( + self, + facts: list[ObservedFact], + paths: list[str], + ) -> list[ObservedFact]: + matched: list[ObservedFact] = [] + for fact in facts: + if any(fact.path == path or fact.path.startswith(f"{path}/") for path in paths): + matched.append(fact) + return self._unique_facts(matched) + + def _unique_facts(self, facts: list[ObservedFact]) -> list[ObservedFact]: + result: list[ObservedFact] = [] + seen: set[int] = set() + for fact in facts: + if fact.id in seen: + continue + seen.add(fact.id) + result.append(fact) + return result + def _attach_interface_features( self, capabilities: list[CandidateCapabilityDraft], diff --git a/tests/test_candidate_graph.py b/tests/test_candidate_graph.py index de41846..7985509 100644 --- a/tests/test_candidate_graph.py +++ b/tests/test_candidate_graph.py @@ -561,7 +561,94 @@ def test_candidate_generator_does_not_promote_owned_provider_vocabulary_to_capab capability_names = {capability.name for capability in graph[0].capabilities} assert "Route LLM Requests Across Providers" not in capability_names - assert "Expose Repository Interface" in capability_names + assert "Scan Repositories Into Observed Facts" in capability_names + + +def test_candidate_generator_recovers_repo_scoping_native_candidate_families(): + repository = Repository( + id=1, + name="repo-scoping", + url="/tmp/repo-scoping", + description="Maps repositories into reviewable capability graphs.", + branch="main", + status="analyzed", + ) + facts = [ + fact(1, "documentation", "README", "README.md"), + fact(2, "documentation", "api-contract.md", "docs/api-contract.md"), + fact( + 3, + "documentation", + "characteristic-evidence-model.md", + "docs/characteristic-evidence-model.md", + ), + fact(4, "documentation", "scope-md-spec.md", "docs/scope-md-spec.md"), + fact( + 5, + "documentation", + "dependency-aware-scope-propagation.md", + "docs/dependency-aware-scope-propagation.md", + ), + fact( + 6, + "documentation", + "repo-scope-context-response.json", + "docs/schemas/repo-scope-context-response.json", + ), + fact(7, "test", "test_git_ingestion.py", "tests/test_git_ingestion.py"), + fact( + 8, + "test", + "test_repository_metadata.py", + "tests/test_repository_metadata.py", + ), + fact( + 9, + "test", + "test_repository_scanner.py", + "tests/test_repository_scanner.py", + ), + fact(10, "test", "test_content_indexing.py", "tests/test_content_indexing.py"), + fact(11, "test", "test_candidate_graph.py", "tests/test_candidate_graph.py"), + fact(12, "test", "test_llm_extraction.py", "tests/test_llm_extraction.py"), + fact(13, "test", "test_registry_service.py", "tests/test_registry_service.py"), + fact(14, "test", "test_scope_generator.py", "tests/test_scope_generator.py"), + fact(15, "test", "test_web_api.py", "tests/test_web_api.py"), + fact(16, "test", "test_scope_context_api.py", "tests/test_scope_context_api.py"), + fact( + 17, + "interface", + "python route decorator", + "src/repo_registry/web_api/app.py", + '@app.post("/repos")', + ), + ] + + graph = CandidateGraphGenerator().generate(repository, facts) + + capability_names = {capability.name for capability in graph[0].capabilities} + assert { + "Register And Track Repositories", + "Scan Repositories Into Observed Facts", + "Index Source Content With Provenance", + "Generate Reviewable Candidate Characteristics", + "Review And Approve Candidate Characteristics", + "Search Compare And Export Approved Profiles", + "Generate And Maintain SCOPE.md", + "Explore Dependency And Impact Graphs", + "Provide Scope Context To Downstream Agents", + } <= capability_names + assert "Route LLM Requests Across Providers" not in capability_names + scanning = next( + capability + for capability in graph[0].capabilities + if capability.name == "Scan Repositories Into Observed Facts" + ) + assert scanning.primary_class == "analysis" + assert {"deterministic", "facts", "provenance", "utility-owned"} <= set( + scanning.attributes + ) + assert all(ref.path.startswith(("docs/", "tests/", "src/")) for ref in scanning.source_refs) def test_candidate_generator_excludes_mention_only_providers_from_promoted_capability(): diff --git a/workplans/RREG-WP-0016-native-candidate-generation-recovery.md b/workplans/RREG-WP-0016-native-candidate-generation-recovery.md index 42eb9ac..5c77d9c 100644 --- a/workplans/RREG-WP-0016-native-candidate-generation-recovery.md +++ b/workplans/RREG-WP-0016-native-candidate-generation-recovery.md @@ -62,7 +62,7 @@ remaining generated candidate is `Expose Repository Interface`. ```task id: RREG-WP-0016-T02 -status: todo +status: done priority: high state_hub_task_id: "3db9742c-43fd-48ec-bcb7-13034f8c3f2e" ``` @@ -89,6 +89,14 @@ Acceptance criteria: - Candidate source refs cite repo-owned docs/source/tests instead of schema examples or dependency vocabulary alone. +Implementation note 2026-05-15: added repo-scoping native capability seeds +derived from owned path clusters across docs, tests, source, and API/CLI +interfaces. The generator now emits the nine expected repo-scoping candidate +families instead of a single generic interface bucket. A throwaway +self-assessment preview reached `candidate_improvement`: all golden expected +capabilities matched, the provider-routing forbidden capability stayed absent, +and no misplaced API/CLI features were reported. + ## T03: Re-Run Clean Self-Assessment And Compare ```task