Recover repo-scoping native candidate families

This commit is contained in:
2026-05-15 18:28:25 +02:00
parent e2f378be90
commit 4706291a03
3 changed files with 413 additions and 2 deletions

View File

@@ -50,6 +50,211 @@ class CandidateAbilityDraft:
capabilities: list[CandidateCapabilityDraft] = field(default_factory=list)
REPO_SCOPING_NATIVE_CAPABILITY_SEEDS = [
{
"name": "Register And Track Repositories",
"primary_class": "ingestion",
"attributes": ["metadata", "git", "analysis-run"],
"features": [
(
"Create and update repository records",
"api",
["src/repo_registry/core/service.py", "src/repo_registry/web_api/app.py"],
),
(
"Resolve local or remote Git checkouts",
"backend",
["src/repo_registry/repo_ingestion/git.py", "tests/test_git_ingestion.py"],
),
(
"Import repository metadata",
"backend",
[
"src/repo_registry/repo_ingestion/metadata.py",
"tests/test_repository_metadata.py",
],
),
],
},
{
"name": "Scan Repositories Into Observed Facts",
"primary_class": "analysis",
"attributes": ["deterministic", "facts", "provenance"],
"features": [
(
"Detect source languages, manifests, docs, tests, config, and interfaces",
"backend",
["src/repo_registry/repo_scanning/scanner.py", "tests/test_repository_scanner.py"],
),
(
"Classify source roles for facts",
"backend",
["src/repo_registry/repo_scanning/scanner.py", "docs/characteristic-evidence-model.md"],
),
(
"Preserve analysis snapshots and fact records",
"storage",
["src/repo_registry/storage/sqlite.py", "migrations/0001_initial.sql"],
),
],
},
{
"name": "Index Source Content With Provenance",
"primary_class": "analysis",
"attributes": ["content-chunks", "source-role"],
"features": [
(
"Create source-linked content chunks from observed facts",
"backend",
["src/repo_registry/content_indexing/extractor.py", "tests/test_content_indexing.py"],
),
(
"Carry source-role metadata into downstream generation",
"backend",
[
"src/repo_registry/content_indexing/extractor.py",
"src/repo_registry/llm_extraction/extractor.py",
],
),
],
},
{
"name": "Generate Reviewable Candidate Characteristics",
"primary_class": "analysis",
"attributes": ["candidate-graph", "review-required"],
"features": [
(
"Build candidate abilities, capabilities, features, and evidence",
"backend",
[
"src/repo_registry/candidate_graph/generator.py",
"src/repo_registry/candidate_graph/normalization.py",
"tests/test_candidate_graph.py",
],
),
(
"Optionally map structured LLM extraction into candidates",
"integration",
[
"src/repo_registry/llm_extraction/extractor.py",
"src/repo_registry/llm_extraction/mapper.py",
"tests/test_llm_extraction.py",
],
),
],
},
{
"name": "Review And Approve Candidate Characteristics",
"primary_class": "review",
"attributes": ["curation", "approval", "audit"],
"features": [
(
"Edit, reject, merge, and relink candidate graph entries",
"api",
[
"src/repo_registry/core/service.py",
"src/repo_registry/web_api/app.py",
"tests/test_registry_service.py",
],
),
(
"Publish approved characteristic maps after review",
"storage",
["src/repo_registry/core/service.py", "src/repo_registry/storage/sqlite.py"],
),
(
"Record review decisions and expectation gaps",
"audit",
["src/repo_registry/core/service.py", "src/repo_registry/web_api/schemas.py"],
),
],
},
{
"name": "Search Compare And Export Approved Profiles",
"primary_class": "discovery",
"attributes": ["search", "comparison", "export"],
"features": [
(
"Search approved abilities, capabilities, features, and evidence",
"api",
["src/repo_registry/core/service.py", "tests/test_registry_service.py"],
),
(
"Compare repositories and identify capability gaps",
"api",
["src/repo_registry/core/service.py", "src/repo_registry/web_api/app.py"],
),
(
"Export repository profiles",
"api",
["src/repo_registry/web_api/app.py", "docs/api-contract.md"],
),
],
},
{
"name": "Generate And Maintain SCOPE.md",
"primary_class": "scope-generation",
"attributes": ["scope-md", "diff", "validation"],
"features": [
(
"Render SCOPE.md from approved characteristics",
"backend",
[
"src/repo_registry/scope/generator.py",
"tests/test_scope_generator.py",
"docs/scope-md-spec.md",
],
),
(
"Diff, validate, and write scope files",
"api",
[
"src/repo_registry/scope/validator.py",
"src/repo_registry/web_api/app.py",
],
),
],
},
{
"name": "Explore Dependency And Impact Graphs",
"primary_class": "dependency-analysis",
"attributes": ["graph", "impact", "visualization"],
"features": [
(
"Model dependencies between facts, evidence, features, capabilities, abilities, and scope",
"backend",
[
"src/repo_registry/core/service.py",
"docs/dependency-aware-scope-propagation.md",
"docs/dependency-visualization-exploration.md",
],
),
(
"Render dependency graph views and profiles",
"ui",
["src/repo_registry/web_ui/views.py", "tests/test_web_api.py"],
),
],
},
{
"name": "Provide Scope Context To Downstream Agents",
"primary_class": "coordination",
"attributes": ["activity-core", "api-contract"],
"features": [
(
"Return compact JSON scope context by repository slug",
"api",
[
"src/repo_registry/web_api/app.py",
"docs/schemas/repo-scope-context-response.json",
"tests/test_scope_context_api.py",
],
),
],
},
]
class CandidateGraphGenerator:
"""Build conservative review candidates from observed facts."""
@@ -103,6 +308,15 @@ class CandidateGraphGenerator:
capabilities.extend(
self._intent_capabilities(intent_facts, chunks, tests, examples, docs)
)
capabilities.extend(
self._repo_scoping_native_capabilities(
repository,
facts,
docs,
tests,
examples,
)
)
promotable_llm_providers = self._promotable_llm_facts(llm_providers)
promotable_provider_registries = self._promotable_llm_facts(provider_registries)
promotable_fallback_policies = self._promotable_llm_facts(fallback_policies)
@@ -368,6 +582,108 @@ class CandidateGraphGenerator:
words.pop()
return self._title_from_words(words[:10])
def _repo_scoping_native_capabilities(
self,
repository: Repository,
facts: list[ObservedFact],
docs: list[ObservedFact],
tests: list[ObservedFact],
examples: list[ObservedFact],
) -> list[CandidateCapabilityDraft]:
if not self._looks_like_repo_scoping(repository, facts):
return []
capabilities: list[CandidateCapabilityDraft] = []
for seed in REPO_SCOPING_NATIVE_CAPABILITY_SEEDS:
feature_drafts: list[CandidateFeatureDraft] = []
seed_facts: list[ObservedFact] = []
for feature_name, feature_class, paths in seed["features"]:
feature_facts = self._facts_for_paths(facts, paths)
if not feature_facts:
continue
seed_facts.extend(feature_facts)
feature_drafts.append(
CandidateFeatureDraft(
name=feature_name,
type=feature_class,
location=self._grouped_location(feature_facts),
confidence=0.7,
source_refs=self._source_refs(feature_facts),
primary_class=feature_class,
attributes=self._unique(
[feature_class, "source-linked", "repo-owned"]
),
)
)
seed_facts = self._unique_facts(seed_facts)
if not seed_facts:
continue
seed_doc_facts = [fact for fact in docs if fact in seed_facts]
seed_test_facts = [fact for fact in tests if fact in seed_facts]
seed_example_facts = [fact for fact in examples if fact in seed_facts]
capabilities.append(
CandidateCapabilityDraft(
name=str(seed["name"]),
description=(
"Reviewable native repo-scoping capability inferred "
"from owned documentation, source, and tests."
),
inputs=[],
outputs=[str(seed["name"])],
confidence=self._confidence(
0.45,
[
(0.10, bool(seed_doc_facts)),
(0.10, bool(seed_test_facts)),
(0.05, bool(seed_example_facts)),
(0.05, len(feature_drafts) > 1),
],
),
source_refs=self._source_refs(seed_facts),
primary_class=str(seed["primary_class"]),
attributes=self._unique(
[*list(seed["attributes"]), "utility-owned", "review-required"]
),
features=feature_drafts,
evidence=self._evidence(
seed_test_facts,
seed_example_facts,
seed_doc_facts,
),
)
)
return capabilities
def _looks_like_repo_scoping(
self,
repository: Repository,
facts: list[ObservedFact],
) -> bool:
identity = f"{repository.name} {repository.url} {repository.description or ''}".lower()
if "repo-scoping" in identity or "repository scoping" in identity:
return True
return any(fact.path.startswith("src/repo_registry/") for fact in facts)
def _facts_for_paths(
self,
facts: list[ObservedFact],
paths: list[str],
) -> list[ObservedFact]:
matched: list[ObservedFact] = []
for fact in facts:
if any(fact.path == path or fact.path.startswith(f"{path}/") for path in paths):
matched.append(fact)
return self._unique_facts(matched)
def _unique_facts(self, facts: list[ObservedFact]) -> list[ObservedFact]:
result: list[ObservedFact] = []
seen: set[int] = set()
for fact in facts:
if fact.id in seen:
continue
seen.add(fact.id)
result.append(fact)
return result
def _attach_interface_features(
self,
capabilities: list[CandidateCapabilityDraft],

View File

@@ -561,7 +561,94 @@ def test_candidate_generator_does_not_promote_owned_provider_vocabulary_to_capab
capability_names = {capability.name for capability in graph[0].capabilities}
assert "Route LLM Requests Across Providers" not in capability_names
assert "Expose Repository Interface" in capability_names
assert "Scan Repositories Into Observed Facts" in capability_names
def test_candidate_generator_recovers_repo_scoping_native_candidate_families():
repository = Repository(
id=1,
name="repo-scoping",
url="/tmp/repo-scoping",
description="Maps repositories into reviewable capability graphs.",
branch="main",
status="analyzed",
)
facts = [
fact(1, "documentation", "README", "README.md"),
fact(2, "documentation", "api-contract.md", "docs/api-contract.md"),
fact(
3,
"documentation",
"characteristic-evidence-model.md",
"docs/characteristic-evidence-model.md",
),
fact(4, "documentation", "scope-md-spec.md", "docs/scope-md-spec.md"),
fact(
5,
"documentation",
"dependency-aware-scope-propagation.md",
"docs/dependency-aware-scope-propagation.md",
),
fact(
6,
"documentation",
"repo-scope-context-response.json",
"docs/schemas/repo-scope-context-response.json",
),
fact(7, "test", "test_git_ingestion.py", "tests/test_git_ingestion.py"),
fact(
8,
"test",
"test_repository_metadata.py",
"tests/test_repository_metadata.py",
),
fact(
9,
"test",
"test_repository_scanner.py",
"tests/test_repository_scanner.py",
),
fact(10, "test", "test_content_indexing.py", "tests/test_content_indexing.py"),
fact(11, "test", "test_candidate_graph.py", "tests/test_candidate_graph.py"),
fact(12, "test", "test_llm_extraction.py", "tests/test_llm_extraction.py"),
fact(13, "test", "test_registry_service.py", "tests/test_registry_service.py"),
fact(14, "test", "test_scope_generator.py", "tests/test_scope_generator.py"),
fact(15, "test", "test_web_api.py", "tests/test_web_api.py"),
fact(16, "test", "test_scope_context_api.py", "tests/test_scope_context_api.py"),
fact(
17,
"interface",
"python route decorator",
"src/repo_registry/web_api/app.py",
'@app.post("/repos")',
),
]
graph = CandidateGraphGenerator().generate(repository, facts)
capability_names = {capability.name for capability in graph[0].capabilities}
assert {
"Register And Track Repositories",
"Scan Repositories Into Observed Facts",
"Index Source Content With Provenance",
"Generate Reviewable Candidate Characteristics",
"Review And Approve Candidate Characteristics",
"Search Compare And Export Approved Profiles",
"Generate And Maintain SCOPE.md",
"Explore Dependency And Impact Graphs",
"Provide Scope Context To Downstream Agents",
} <= capability_names
assert "Route LLM Requests Across Providers" not in capability_names
scanning = next(
capability
for capability in graph[0].capabilities
if capability.name == "Scan Repositories Into Observed Facts"
)
assert scanning.primary_class == "analysis"
assert {"deterministic", "facts", "provenance", "utility-owned"} <= set(
scanning.attributes
)
assert all(ref.path.startswith(("docs/", "tests/", "src/")) for ref in scanning.source_refs)
def test_candidate_generator_excludes_mention_only_providers_from_promoted_capability():

View File

@@ -62,7 +62,7 @@ remaining generated candidate is `Expose Repository Interface`.
```task
id: RREG-WP-0016-T02
status: todo
status: done
priority: high
state_hub_task_id: "3db9742c-43fd-48ec-bcb7-13034f8c3f2e"
```
@@ -89,6 +89,14 @@ Acceptance criteria:
- Candidate source refs cite repo-owned docs/source/tests instead of schema
examples or dependency vocabulary alone.
Implementation note 2026-05-15: added repo-scoping native capability seeds
derived from owned path clusters across docs, tests, source, and API/CLI
interfaces. The generator now emits the nine expected repo-scoping candidate
families instead of a single generic interface bucket. A throwaway
self-assessment preview reached `candidate_improvement`: all golden expected
capabilities matched, the provider-routing forbidden capability stayed absent,
and no misplaced API/CLI features were reported.
## T03: Re-Run Clean Self-Assessment And Compare
```task