generated from coulomb/repo-seed
improved scanner
This commit is contained in:
@@ -58,6 +58,15 @@ new intent file with a clear provenance note. After that bootstrap, the files
|
||||
should diverge naturally: `INTENT.md` remains design intent, while `SCOPE.md`
|
||||
remains generated or curated current scope.
|
||||
|
||||
Provider, dependency, and tooling facts should also carry a utility
|
||||
relationship. A provider mentioned in documentation is usually a `mention`; an
|
||||
environment variable is usually `configure`; a manifest entry is usually
|
||||
`dependency`; implementation code under provider or adapter modules may be
|
||||
`owned` or `adapter`. Candidate generation should promote only relationships
|
||||
that show the repository provides the utility directly or intentionally exposes
|
||||
it as a facade/adapter. Mentions, dependencies, configuration, and tooling are
|
||||
context until a curator promotes them or stronger owned evidence appears.
|
||||
|
||||
Source references point from interpreted claims back to files or facts.
|
||||
|
||||
Evidence is support for a characteristic. It is not the same thing as an observed
|
||||
|
||||
@@ -56,6 +56,10 @@ normalization.
|
||||
`intent_summary`, `derived_scope`, `product_documentation`,
|
||||
`implementation_source`, `dependency_declaration`, `configuration`,
|
||||
`ci_tooling`, `test_evidence`, or `agent_guidance`.
|
||||
- Utility relationship: metadata describing how a fact relates to repository
|
||||
utility, such as `owned`, `facade`, `adapter`, `configure`, `dependency`,
|
||||
`tooling`, or `mention`. Only owned/facade/adapter relationships should be
|
||||
promoted directly into provided capabilities.
|
||||
- Candidate: proposed characteristic or evidence from deterministic heuristics
|
||||
or optional LLM assistance. Candidates are review inputs, not registry truth.
|
||||
- Approved: curated registry truth that appears in ability maps, search, exports,
|
||||
|
||||
@@ -103,7 +103,10 @@ class CandidateGraphGenerator:
|
||||
capabilities.append(
|
||||
self._interface_capability(interfaces, tests, examples, docs, chunks)
|
||||
)
|
||||
if llm_providers or provider_registries or fallback_policies:
|
||||
promotable_llm_facts = self._promotable_llm_facts(
|
||||
llm_providers + provider_registries + fallback_policies
|
||||
)
|
||||
if promotable_llm_facts:
|
||||
capabilities.append(
|
||||
self._llm_provider_capability(
|
||||
llm_providers,
|
||||
@@ -269,6 +272,8 @@ class CandidateGraphGenerator:
|
||||
credentials,
|
||||
registries,
|
||||
fallback_policies,
|
||||
) + self._utility_relationship_attributes(
|
||||
providers + credentials + registries + fallback_policies
|
||||
),
|
||||
features=features,
|
||||
evidence=self._evidence(tests, examples, docs),
|
||||
@@ -761,6 +766,37 @@ class CandidateGraphGenerator:
|
||||
def _facts(self, facts: list[ObservedFact], kind: str) -> list[ObservedFact]:
|
||||
return [fact for fact in facts if fact.kind == kind]
|
||||
|
||||
def _promotable_llm_facts(self, facts: list[ObservedFact]) -> list[ObservedFact]:
|
||||
return [
|
||||
fact
|
||||
for fact in facts
|
||||
if self._utility_relationship(fact) in {"owned", "facade", "adapter"}
|
||||
]
|
||||
|
||||
def _utility_relationship(self, fact: ObservedFact) -> str:
|
||||
relationship = fact.metadata.get("utility_relationship")
|
||||
if isinstance(relationship, str) and relationship:
|
||||
return relationship
|
||||
source_role = fact.metadata.get("source_role")
|
||||
if source_role == "implementation_source":
|
||||
lower_path = fact.path.lower()
|
||||
if "adapter" in lower_path or "provider" in lower_path:
|
||||
return "adapter"
|
||||
return "owned"
|
||||
if source_role == "configuration":
|
||||
return "configure"
|
||||
if source_role == "dependency_declaration":
|
||||
return "dependency"
|
||||
if source_role in {"agent_guidance", "ci_tooling"}:
|
||||
return "tooling"
|
||||
if not source_role and fact.path.lower().endswith((".py", ".ts", ".js")):
|
||||
return "owned"
|
||||
return "mention"
|
||||
|
||||
def _utility_relationship_attributes(self, facts: list[ObservedFact]) -> list[str]:
|
||||
relationships = sorted({self._utility_relationship(fact) for fact in facts})
|
||||
return [f"utility-{relationship}" for relationship in relationships]
|
||||
|
||||
def _source_refs(self, facts: list[ObservedFact]) -> list[SourceReference]:
|
||||
return [
|
||||
SourceReference(
|
||||
|
||||
@@ -358,6 +358,10 @@ class DeterministicScanner:
|
||||
source_role = self._source_role(relative)
|
||||
if source_role == "agent_guidance":
|
||||
continue
|
||||
utility_relationship = self._provider_utility_relationship(
|
||||
source_role,
|
||||
relative,
|
||||
)
|
||||
for needle, provider in LLM_PROVIDER_HINTS.items():
|
||||
if not self._has_provider_signal(lower_text, needle):
|
||||
continue
|
||||
@@ -372,6 +376,7 @@ class DeterministicScanner:
|
||||
metadata={
|
||||
"source": "provider_hint",
|
||||
"source_role": source_role,
|
||||
"utility_relationship": utility_relationship,
|
||||
},
|
||||
),
|
||||
)
|
||||
@@ -389,6 +394,7 @@ class DeterministicScanner:
|
||||
metadata={
|
||||
"source": "environment_variable",
|
||||
"source_role": source_role,
|
||||
"utility_relationship": "configure",
|
||||
},
|
||||
),
|
||||
)
|
||||
@@ -412,6 +418,7 @@ class DeterministicScanner:
|
||||
metadata={
|
||||
"source": "provider_registry_hint",
|
||||
"source_role": source_role,
|
||||
"utility_relationship": utility_relationship,
|
||||
},
|
||||
),
|
||||
)
|
||||
@@ -429,11 +436,30 @@ class DeterministicScanner:
|
||||
metadata={
|
||||
"source": "fallback_hint",
|
||||
"source_role": source_role,
|
||||
"utility_relationship": utility_relationship,
|
||||
},
|
||||
),
|
||||
)
|
||||
return facts
|
||||
|
||||
def _provider_utility_relationship(
|
||||
self,
|
||||
source_role: str,
|
||||
relative_path: str,
|
||||
) -> str:
|
||||
if source_role == "implementation_source":
|
||||
lower = relative_path.lower()
|
||||
if "adapter" in lower or "provider" in lower:
|
||||
return "adapter"
|
||||
return "owned"
|
||||
if source_role == "configuration":
|
||||
return "configure"
|
||||
if source_role == "dependency_declaration":
|
||||
return "dependency"
|
||||
if source_role in {"ci_tooling", "agent_guidance"}:
|
||||
return "tooling"
|
||||
return "mention"
|
||||
|
||||
def _source_role(self, relative_path: str) -> str:
|
||||
lower = relative_path.lower()
|
||||
parts = lower.split("/")
|
||||
|
||||
@@ -2,7 +2,7 @@ from repo_registry.candidate_graph.generator import CandidateGraphGenerator
|
||||
from repo_registry.core.models import ContentChunk, ObservedFact, Repository
|
||||
|
||||
|
||||
def fact(id, kind, name, path="", value=""):
|
||||
def fact(id, kind, name, path="", value="", metadata=None):
|
||||
return ObservedFact(
|
||||
id=id,
|
||||
repository_id=1,
|
||||
@@ -12,7 +12,7 @@ def fact(id, kind, name, path="", value=""):
|
||||
path=path,
|
||||
name=name,
|
||||
value=value,
|
||||
metadata={},
|
||||
metadata=metadata or {},
|
||||
)
|
||||
|
||||
|
||||
@@ -310,11 +310,44 @@ def test_candidate_generator_maps_llm_provider_facts_to_capability():
|
||||
)
|
||||
facts = [
|
||||
fact(1, "documentation", "README", "README.md"),
|
||||
fact(2, "llm_provider", "OpenRouter", "providers.py", "openrouter"),
|
||||
fact(3, "llm_provider", "Claude", "providers.py", "claude"),
|
||||
fact(4, "credential_config", "OpenRouter API key", ".env.example", "OPENROUTER_API_KEY"),
|
||||
fact(5, "provider_registry", "LLM provider registry", "providers.py"),
|
||||
fact(6, "fallback_policy", "LLM provider fallback policy", "providers.py"),
|
||||
fact(
|
||||
2,
|
||||
"llm_provider",
|
||||
"OpenRouter",
|
||||
"providers.py",
|
||||
"openrouter",
|
||||
{"source_role": "implementation_source", "utility_relationship": "adapter"},
|
||||
),
|
||||
fact(
|
||||
3,
|
||||
"llm_provider",
|
||||
"Claude",
|
||||
"providers.py",
|
||||
"claude",
|
||||
{"source_role": "implementation_source", "utility_relationship": "adapter"},
|
||||
),
|
||||
fact(
|
||||
4,
|
||||
"credential_config",
|
||||
"OpenRouter API key",
|
||||
".env.example",
|
||||
"OPENROUTER_API_KEY",
|
||||
{"source_role": "configuration", "utility_relationship": "configure"},
|
||||
),
|
||||
fact(
|
||||
5,
|
||||
"provider_registry",
|
||||
"LLM provider registry",
|
||||
"providers.py",
|
||||
metadata={"source_role": "implementation_source", "utility_relationship": "adapter"},
|
||||
),
|
||||
fact(
|
||||
6,
|
||||
"fallback_policy",
|
||||
"LLM provider fallback policy",
|
||||
"providers.py",
|
||||
metadata={"source_role": "implementation_source", "utility_relationship": "adapter"},
|
||||
),
|
||||
]
|
||||
|
||||
graph = CandidateGraphGenerator().generate(repository, facts)
|
||||
@@ -329,6 +362,7 @@ def test_candidate_generator_maps_llm_provider_facts_to_capability():
|
||||
assert {"llm-provider", "openrouter", "claude", "fallback-policy"} <= set(
|
||||
capability.attributes
|
||||
)
|
||||
assert {"utility-adapter", "utility-configure"} <= set(capability.attributes)
|
||||
feature_names = {feature.name for feature in capability.features}
|
||||
assert {"Use OpenRouter Models", "Use Claude Models"} <= feature_names
|
||||
assert "Configure LLM Provider Credentials" in feature_names
|
||||
@@ -338,4 +372,34 @@ def test_candidate_generator_maps_llm_provider_facts_to_capability():
|
||||
feature for feature in capability.features if feature.name == "Use OpenRouter Models"
|
||||
)
|
||||
assert openrouter_feature.primary_class == "integration"
|
||||
|
||||
|
||||
def test_candidate_generator_does_not_promote_llm_provider_mentions_to_capability():
|
||||
repository = Repository(
|
||||
id=1,
|
||||
name="MentionOnly",
|
||||
url="/tmp/mention-only",
|
||||
description=None,
|
||||
branch="main",
|
||||
status="analyzed",
|
||||
)
|
||||
facts = [
|
||||
fact(1, "documentation", "README", "README.md"),
|
||||
fact(
|
||||
2,
|
||||
"llm_provider",
|
||||
"Claude",
|
||||
"README.md",
|
||||
"claude",
|
||||
{"source_role": "product_documentation", "utility_relationship": "mention"},
|
||||
),
|
||||
]
|
||||
|
||||
graph = CandidateGraphGenerator().generate(repository, facts)
|
||||
|
||||
assert [
|
||||
capability.name
|
||||
for capability in graph[0].capabilities
|
||||
if capability.name == "Route LLM Requests Across Providers"
|
||||
] == []
|
||||
assert {"llm-provider", "openrouter"} <= set(openrouter_feature.attributes)
|
||||
|
||||
@@ -142,6 +142,20 @@ def test_scanner_records_llm_provider_and_fallback_facts(tmp_path):
|
||||
assert ("provider_registry", "LLM provider registry", "providers.py") in facts
|
||||
assert ("fallback_policy", "LLM provider fallback policy", "README.md") in facts
|
||||
|
||||
by_key = {(fact.kind, fact.name, fact.path): fact for fact in result.facts}
|
||||
assert by_key[("llm_provider", "OpenRouter", "README.md")].metadata[
|
||||
"utility_relationship"
|
||||
] == "mention"
|
||||
assert by_key[("llm_provider", "OpenRouter", "providers.py")].metadata[
|
||||
"utility_relationship"
|
||||
] == "adapter"
|
||||
assert by_key[("credential_config", "OpenRouter API key", ".env.example")].metadata[
|
||||
"utility_relationship"
|
||||
] == "configure"
|
||||
assert by_key[("provider_registry", "LLM provider registry", "providers.py")].metadata[
|
||||
"utility_relationship"
|
||||
] == "adapter"
|
||||
|
||||
|
||||
def test_scanner_does_not_treat_agent_guidance_as_llm_provider(tmp_path):
|
||||
repo = tmp_path / "key-cape-like"
|
||||
|
||||
@@ -98,7 +98,7 @@ Acceptance criteria:
|
||||
|
||||
```task
|
||||
id: RREG-WP-0009-T03
|
||||
status: todo
|
||||
status: in_progress
|
||||
priority: high
|
||||
state_hub_task_id: "3b8bac53-6a14-43b3-9a59-e15c24c0cd6e"
|
||||
```
|
||||
|
||||
Reference in New Issue
Block a user