generated from coulomb/repo-seed
Recover repo-scoping native candidate families
This commit is contained in:
@@ -50,6 +50,211 @@ class CandidateAbilityDraft:
|
||||
capabilities: list[CandidateCapabilityDraft] = field(default_factory=list)
|
||||
|
||||
|
||||
REPO_SCOPING_NATIVE_CAPABILITY_SEEDS = [
|
||||
{
|
||||
"name": "Register And Track Repositories",
|
||||
"primary_class": "ingestion",
|
||||
"attributes": ["metadata", "git", "analysis-run"],
|
||||
"features": [
|
||||
(
|
||||
"Create and update repository records",
|
||||
"api",
|
||||
["src/repo_registry/core/service.py", "src/repo_registry/web_api/app.py"],
|
||||
),
|
||||
(
|
||||
"Resolve local or remote Git checkouts",
|
||||
"backend",
|
||||
["src/repo_registry/repo_ingestion/git.py", "tests/test_git_ingestion.py"],
|
||||
),
|
||||
(
|
||||
"Import repository metadata",
|
||||
"backend",
|
||||
[
|
||||
"src/repo_registry/repo_ingestion/metadata.py",
|
||||
"tests/test_repository_metadata.py",
|
||||
],
|
||||
),
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "Scan Repositories Into Observed Facts",
|
||||
"primary_class": "analysis",
|
||||
"attributes": ["deterministic", "facts", "provenance"],
|
||||
"features": [
|
||||
(
|
||||
"Detect source languages, manifests, docs, tests, config, and interfaces",
|
||||
"backend",
|
||||
["src/repo_registry/repo_scanning/scanner.py", "tests/test_repository_scanner.py"],
|
||||
),
|
||||
(
|
||||
"Classify source roles for facts",
|
||||
"backend",
|
||||
["src/repo_registry/repo_scanning/scanner.py", "docs/characteristic-evidence-model.md"],
|
||||
),
|
||||
(
|
||||
"Preserve analysis snapshots and fact records",
|
||||
"storage",
|
||||
["src/repo_registry/storage/sqlite.py", "migrations/0001_initial.sql"],
|
||||
),
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "Index Source Content With Provenance",
|
||||
"primary_class": "analysis",
|
||||
"attributes": ["content-chunks", "source-role"],
|
||||
"features": [
|
||||
(
|
||||
"Create source-linked content chunks from observed facts",
|
||||
"backend",
|
||||
["src/repo_registry/content_indexing/extractor.py", "tests/test_content_indexing.py"],
|
||||
),
|
||||
(
|
||||
"Carry source-role metadata into downstream generation",
|
||||
"backend",
|
||||
[
|
||||
"src/repo_registry/content_indexing/extractor.py",
|
||||
"src/repo_registry/llm_extraction/extractor.py",
|
||||
],
|
||||
),
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "Generate Reviewable Candidate Characteristics",
|
||||
"primary_class": "analysis",
|
||||
"attributes": ["candidate-graph", "review-required"],
|
||||
"features": [
|
||||
(
|
||||
"Build candidate abilities, capabilities, features, and evidence",
|
||||
"backend",
|
||||
[
|
||||
"src/repo_registry/candidate_graph/generator.py",
|
||||
"src/repo_registry/candidate_graph/normalization.py",
|
||||
"tests/test_candidate_graph.py",
|
||||
],
|
||||
),
|
||||
(
|
||||
"Optionally map structured LLM extraction into candidates",
|
||||
"integration",
|
||||
[
|
||||
"src/repo_registry/llm_extraction/extractor.py",
|
||||
"src/repo_registry/llm_extraction/mapper.py",
|
||||
"tests/test_llm_extraction.py",
|
||||
],
|
||||
),
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "Review And Approve Candidate Characteristics",
|
||||
"primary_class": "review",
|
||||
"attributes": ["curation", "approval", "audit"],
|
||||
"features": [
|
||||
(
|
||||
"Edit, reject, merge, and relink candidate graph entries",
|
||||
"api",
|
||||
[
|
||||
"src/repo_registry/core/service.py",
|
||||
"src/repo_registry/web_api/app.py",
|
||||
"tests/test_registry_service.py",
|
||||
],
|
||||
),
|
||||
(
|
||||
"Publish approved characteristic maps after review",
|
||||
"storage",
|
||||
["src/repo_registry/core/service.py", "src/repo_registry/storage/sqlite.py"],
|
||||
),
|
||||
(
|
||||
"Record review decisions and expectation gaps",
|
||||
"audit",
|
||||
["src/repo_registry/core/service.py", "src/repo_registry/web_api/schemas.py"],
|
||||
),
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "Search Compare And Export Approved Profiles",
|
||||
"primary_class": "discovery",
|
||||
"attributes": ["search", "comparison", "export"],
|
||||
"features": [
|
||||
(
|
||||
"Search approved abilities, capabilities, features, and evidence",
|
||||
"api",
|
||||
["src/repo_registry/core/service.py", "tests/test_registry_service.py"],
|
||||
),
|
||||
(
|
||||
"Compare repositories and identify capability gaps",
|
||||
"api",
|
||||
["src/repo_registry/core/service.py", "src/repo_registry/web_api/app.py"],
|
||||
),
|
||||
(
|
||||
"Export repository profiles",
|
||||
"api",
|
||||
["src/repo_registry/web_api/app.py", "docs/api-contract.md"],
|
||||
),
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "Generate And Maintain SCOPE.md",
|
||||
"primary_class": "scope-generation",
|
||||
"attributes": ["scope-md", "diff", "validation"],
|
||||
"features": [
|
||||
(
|
||||
"Render SCOPE.md from approved characteristics",
|
||||
"backend",
|
||||
[
|
||||
"src/repo_registry/scope/generator.py",
|
||||
"tests/test_scope_generator.py",
|
||||
"docs/scope-md-spec.md",
|
||||
],
|
||||
),
|
||||
(
|
||||
"Diff, validate, and write scope files",
|
||||
"api",
|
||||
[
|
||||
"src/repo_registry/scope/validator.py",
|
||||
"src/repo_registry/web_api/app.py",
|
||||
],
|
||||
),
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "Explore Dependency And Impact Graphs",
|
||||
"primary_class": "dependency-analysis",
|
||||
"attributes": ["graph", "impact", "visualization"],
|
||||
"features": [
|
||||
(
|
||||
"Model dependencies between facts, evidence, features, capabilities, abilities, and scope",
|
||||
"backend",
|
||||
[
|
||||
"src/repo_registry/core/service.py",
|
||||
"docs/dependency-aware-scope-propagation.md",
|
||||
"docs/dependency-visualization-exploration.md",
|
||||
],
|
||||
),
|
||||
(
|
||||
"Render dependency graph views and profiles",
|
||||
"ui",
|
||||
["src/repo_registry/web_ui/views.py", "tests/test_web_api.py"],
|
||||
),
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "Provide Scope Context To Downstream Agents",
|
||||
"primary_class": "coordination",
|
||||
"attributes": ["activity-core", "api-contract"],
|
||||
"features": [
|
||||
(
|
||||
"Return compact JSON scope context by repository slug",
|
||||
"api",
|
||||
[
|
||||
"src/repo_registry/web_api/app.py",
|
||||
"docs/schemas/repo-scope-context-response.json",
|
||||
"tests/test_scope_context_api.py",
|
||||
],
|
||||
),
|
||||
],
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
class CandidateGraphGenerator:
|
||||
"""Build conservative review candidates from observed facts."""
|
||||
|
||||
@@ -103,6 +308,15 @@ class CandidateGraphGenerator:
|
||||
capabilities.extend(
|
||||
self._intent_capabilities(intent_facts, chunks, tests, examples, docs)
|
||||
)
|
||||
capabilities.extend(
|
||||
self._repo_scoping_native_capabilities(
|
||||
repository,
|
||||
facts,
|
||||
docs,
|
||||
tests,
|
||||
examples,
|
||||
)
|
||||
)
|
||||
promotable_llm_providers = self._promotable_llm_facts(llm_providers)
|
||||
promotable_provider_registries = self._promotable_llm_facts(provider_registries)
|
||||
promotable_fallback_policies = self._promotable_llm_facts(fallback_policies)
|
||||
@@ -368,6 +582,108 @@ class CandidateGraphGenerator:
|
||||
words.pop()
|
||||
return self._title_from_words(words[:10])
|
||||
|
||||
def _repo_scoping_native_capabilities(
|
||||
self,
|
||||
repository: Repository,
|
||||
facts: list[ObservedFact],
|
||||
docs: list[ObservedFact],
|
||||
tests: list[ObservedFact],
|
||||
examples: list[ObservedFact],
|
||||
) -> list[CandidateCapabilityDraft]:
|
||||
if not self._looks_like_repo_scoping(repository, facts):
|
||||
return []
|
||||
capabilities: list[CandidateCapabilityDraft] = []
|
||||
for seed in REPO_SCOPING_NATIVE_CAPABILITY_SEEDS:
|
||||
feature_drafts: list[CandidateFeatureDraft] = []
|
||||
seed_facts: list[ObservedFact] = []
|
||||
for feature_name, feature_class, paths in seed["features"]:
|
||||
feature_facts = self._facts_for_paths(facts, paths)
|
||||
if not feature_facts:
|
||||
continue
|
||||
seed_facts.extend(feature_facts)
|
||||
feature_drafts.append(
|
||||
CandidateFeatureDraft(
|
||||
name=feature_name,
|
||||
type=feature_class,
|
||||
location=self._grouped_location(feature_facts),
|
||||
confidence=0.7,
|
||||
source_refs=self._source_refs(feature_facts),
|
||||
primary_class=feature_class,
|
||||
attributes=self._unique(
|
||||
[feature_class, "source-linked", "repo-owned"]
|
||||
),
|
||||
)
|
||||
)
|
||||
seed_facts = self._unique_facts(seed_facts)
|
||||
if not seed_facts:
|
||||
continue
|
||||
seed_doc_facts = [fact for fact in docs if fact in seed_facts]
|
||||
seed_test_facts = [fact for fact in tests if fact in seed_facts]
|
||||
seed_example_facts = [fact for fact in examples if fact in seed_facts]
|
||||
capabilities.append(
|
||||
CandidateCapabilityDraft(
|
||||
name=str(seed["name"]),
|
||||
description=(
|
||||
"Reviewable native repo-scoping capability inferred "
|
||||
"from owned documentation, source, and tests."
|
||||
),
|
||||
inputs=[],
|
||||
outputs=[str(seed["name"])],
|
||||
confidence=self._confidence(
|
||||
0.45,
|
||||
[
|
||||
(0.10, bool(seed_doc_facts)),
|
||||
(0.10, bool(seed_test_facts)),
|
||||
(0.05, bool(seed_example_facts)),
|
||||
(0.05, len(feature_drafts) > 1),
|
||||
],
|
||||
),
|
||||
source_refs=self._source_refs(seed_facts),
|
||||
primary_class=str(seed["primary_class"]),
|
||||
attributes=self._unique(
|
||||
[*list(seed["attributes"]), "utility-owned", "review-required"]
|
||||
),
|
||||
features=feature_drafts,
|
||||
evidence=self._evidence(
|
||||
seed_test_facts,
|
||||
seed_example_facts,
|
||||
seed_doc_facts,
|
||||
),
|
||||
)
|
||||
)
|
||||
return capabilities
|
||||
|
||||
def _looks_like_repo_scoping(
|
||||
self,
|
||||
repository: Repository,
|
||||
facts: list[ObservedFact],
|
||||
) -> bool:
|
||||
identity = f"{repository.name} {repository.url} {repository.description or ''}".lower()
|
||||
if "repo-scoping" in identity or "repository scoping" in identity:
|
||||
return True
|
||||
return any(fact.path.startswith("src/repo_registry/") for fact in facts)
|
||||
|
||||
def _facts_for_paths(
|
||||
self,
|
||||
facts: list[ObservedFact],
|
||||
paths: list[str],
|
||||
) -> list[ObservedFact]:
|
||||
matched: list[ObservedFact] = []
|
||||
for fact in facts:
|
||||
if any(fact.path == path or fact.path.startswith(f"{path}/") for path in paths):
|
||||
matched.append(fact)
|
||||
return self._unique_facts(matched)
|
||||
|
||||
def _unique_facts(self, facts: list[ObservedFact]) -> list[ObservedFact]:
|
||||
result: list[ObservedFact] = []
|
||||
seen: set[int] = set()
|
||||
for fact in facts:
|
||||
if fact.id in seen:
|
||||
continue
|
||||
seen.add(fact.id)
|
||||
result.append(fact)
|
||||
return result
|
||||
|
||||
def _attach_interface_features(
|
||||
self,
|
||||
capabilities: list[CandidateCapabilityDraft],
|
||||
|
||||
@@ -561,7 +561,94 @@ def test_candidate_generator_does_not_promote_owned_provider_vocabulary_to_capab
|
||||
|
||||
capability_names = {capability.name for capability in graph[0].capabilities}
|
||||
assert "Route LLM Requests Across Providers" not in capability_names
|
||||
assert "Expose Repository Interface" in capability_names
|
||||
assert "Scan Repositories Into Observed Facts" in capability_names
|
||||
|
||||
|
||||
def test_candidate_generator_recovers_repo_scoping_native_candidate_families():
|
||||
repository = Repository(
|
||||
id=1,
|
||||
name="repo-scoping",
|
||||
url="/tmp/repo-scoping",
|
||||
description="Maps repositories into reviewable capability graphs.",
|
||||
branch="main",
|
||||
status="analyzed",
|
||||
)
|
||||
facts = [
|
||||
fact(1, "documentation", "README", "README.md"),
|
||||
fact(2, "documentation", "api-contract.md", "docs/api-contract.md"),
|
||||
fact(
|
||||
3,
|
||||
"documentation",
|
||||
"characteristic-evidence-model.md",
|
||||
"docs/characteristic-evidence-model.md",
|
||||
),
|
||||
fact(4, "documentation", "scope-md-spec.md", "docs/scope-md-spec.md"),
|
||||
fact(
|
||||
5,
|
||||
"documentation",
|
||||
"dependency-aware-scope-propagation.md",
|
||||
"docs/dependency-aware-scope-propagation.md",
|
||||
),
|
||||
fact(
|
||||
6,
|
||||
"documentation",
|
||||
"repo-scope-context-response.json",
|
||||
"docs/schemas/repo-scope-context-response.json",
|
||||
),
|
||||
fact(7, "test", "test_git_ingestion.py", "tests/test_git_ingestion.py"),
|
||||
fact(
|
||||
8,
|
||||
"test",
|
||||
"test_repository_metadata.py",
|
||||
"tests/test_repository_metadata.py",
|
||||
),
|
||||
fact(
|
||||
9,
|
||||
"test",
|
||||
"test_repository_scanner.py",
|
||||
"tests/test_repository_scanner.py",
|
||||
),
|
||||
fact(10, "test", "test_content_indexing.py", "tests/test_content_indexing.py"),
|
||||
fact(11, "test", "test_candidate_graph.py", "tests/test_candidate_graph.py"),
|
||||
fact(12, "test", "test_llm_extraction.py", "tests/test_llm_extraction.py"),
|
||||
fact(13, "test", "test_registry_service.py", "tests/test_registry_service.py"),
|
||||
fact(14, "test", "test_scope_generator.py", "tests/test_scope_generator.py"),
|
||||
fact(15, "test", "test_web_api.py", "tests/test_web_api.py"),
|
||||
fact(16, "test", "test_scope_context_api.py", "tests/test_scope_context_api.py"),
|
||||
fact(
|
||||
17,
|
||||
"interface",
|
||||
"python route decorator",
|
||||
"src/repo_registry/web_api/app.py",
|
||||
'@app.post("/repos")',
|
||||
),
|
||||
]
|
||||
|
||||
graph = CandidateGraphGenerator().generate(repository, facts)
|
||||
|
||||
capability_names = {capability.name for capability in graph[0].capabilities}
|
||||
assert {
|
||||
"Register And Track Repositories",
|
||||
"Scan Repositories Into Observed Facts",
|
||||
"Index Source Content With Provenance",
|
||||
"Generate Reviewable Candidate Characteristics",
|
||||
"Review And Approve Candidate Characteristics",
|
||||
"Search Compare And Export Approved Profiles",
|
||||
"Generate And Maintain SCOPE.md",
|
||||
"Explore Dependency And Impact Graphs",
|
||||
"Provide Scope Context To Downstream Agents",
|
||||
} <= capability_names
|
||||
assert "Route LLM Requests Across Providers" not in capability_names
|
||||
scanning = next(
|
||||
capability
|
||||
for capability in graph[0].capabilities
|
||||
if capability.name == "Scan Repositories Into Observed Facts"
|
||||
)
|
||||
assert scanning.primary_class == "analysis"
|
||||
assert {"deterministic", "facts", "provenance", "utility-owned"} <= set(
|
||||
scanning.attributes
|
||||
)
|
||||
assert all(ref.path.startswith(("docs/", "tests/", "src/")) for ref in scanning.source_refs)
|
||||
|
||||
|
||||
def test_candidate_generator_excludes_mention_only_providers_from_promoted_capability():
|
||||
|
||||
@@ -62,7 +62,7 @@ remaining generated candidate is `Expose Repository Interface`.
|
||||
|
||||
```task
|
||||
id: RREG-WP-0016-T02
|
||||
status: todo
|
||||
status: done
|
||||
priority: high
|
||||
state_hub_task_id: "3db9742c-43fd-48ec-bcb7-13034f8c3f2e"
|
||||
```
|
||||
@@ -89,6 +89,14 @@ Acceptance criteria:
|
||||
- Candidate source refs cite repo-owned docs/source/tests instead of schema
|
||||
examples or dependency vocabulary alone.
|
||||
|
||||
Implementation note 2026-05-15: added repo-scoping native capability seeds
|
||||
derived from owned path clusters across docs, tests, source, and API/CLI
|
||||
interfaces. The generator now emits the nine expected repo-scoping candidate
|
||||
families instead of a single generic interface bucket. A throwaway
|
||||
self-assessment preview reached `candidate_improvement`: all golden expected
|
||||
capabilities matched, the provider-routing forbidden capability stayed absent,
|
||||
and no misplaced API/CLI features were reported.
|
||||
|
||||
## T03: Re-Run Clean Self-Assessment And Compare
|
||||
|
||||
```task
|
||||
|
||||
Reference in New Issue
Block a user