diff --git a/docs/characteristic-evidence-model.md b/docs/characteristic-evidence-model.md index 90f65f7..1235209 100644 --- a/docs/characteristic-evidence-model.md +++ b/docs/characteristic-evidence-model.md @@ -58,6 +58,15 @@ new intent file with a clear provenance note. After that bootstrap, the files should diverge naturally: `INTENT.md` remains design intent, while `SCOPE.md` remains generated or curated current scope. +Provider, dependency, and tooling facts should also carry a utility +relationship. A provider mentioned in documentation is usually a `mention`; an +environment variable is usually `configure`; a manifest entry is usually +`dependency`; implementation code under provider or adapter modules may be +`owned` or `adapter`. Candidate generation should promote only relationships +that show the repository provides the utility directly or intentionally exposes +it as a facade/adapter. Mentions, dependencies, configuration, and tooling are +context until a curator promotes them or stronger owned evidence appears. + Source references point from interpreted claims back to files or facts. Evidence is support for a characteristic. It is not the same thing as an observed diff --git a/docs/terminology.md b/docs/terminology.md index a19f1a4..2be0cd8 100644 --- a/docs/terminology.md +++ b/docs/terminology.md @@ -56,6 +56,10 @@ normalization. `intent_summary`, `derived_scope`, `product_documentation`, `implementation_source`, `dependency_declaration`, `configuration`, `ci_tooling`, `test_evidence`, or `agent_guidance`. +- Utility relationship: metadata describing how a fact relates to repository + utility, such as `owned`, `facade`, `adapter`, `configure`, `dependency`, + `tooling`, or `mention`. Only owned/facade/adapter relationships should be + promoted directly into provided capabilities. - Candidate: proposed characteristic or evidence from deterministic heuristics or optional LLM assistance. Candidates are review inputs, not registry truth. - Approved: curated registry truth that appears in ability maps, search, exports, diff --git a/src/repo_registry/candidate_graph/generator.py b/src/repo_registry/candidate_graph/generator.py index 838e6f5..ea2a4d0 100644 --- a/src/repo_registry/candidate_graph/generator.py +++ b/src/repo_registry/candidate_graph/generator.py @@ -103,7 +103,10 @@ class CandidateGraphGenerator: capabilities.append( self._interface_capability(interfaces, tests, examples, docs, chunks) ) - if llm_providers or provider_registries or fallback_policies: + promotable_llm_facts = self._promotable_llm_facts( + llm_providers + provider_registries + fallback_policies + ) + if promotable_llm_facts: capabilities.append( self._llm_provider_capability( llm_providers, @@ -269,6 +272,8 @@ class CandidateGraphGenerator: credentials, registries, fallback_policies, + ) + self._utility_relationship_attributes( + providers + credentials + registries + fallback_policies ), features=features, evidence=self._evidence(tests, examples, docs), @@ -761,6 +766,37 @@ class CandidateGraphGenerator: def _facts(self, facts: list[ObservedFact], kind: str) -> list[ObservedFact]: return [fact for fact in facts if fact.kind == kind] + def _promotable_llm_facts(self, facts: list[ObservedFact]) -> list[ObservedFact]: + return [ + fact + for fact in facts + if self._utility_relationship(fact) in {"owned", "facade", "adapter"} + ] + + def _utility_relationship(self, fact: ObservedFact) -> str: + relationship = fact.metadata.get("utility_relationship") + if isinstance(relationship, str) and relationship: + return relationship + source_role = fact.metadata.get("source_role") + if source_role == "implementation_source": + lower_path = fact.path.lower() + if "adapter" in lower_path or "provider" in lower_path: + return "adapter" + return "owned" + if source_role == "configuration": + return "configure" + if source_role == "dependency_declaration": + return "dependency" + if source_role in {"agent_guidance", "ci_tooling"}: + return "tooling" + if not source_role and fact.path.lower().endswith((".py", ".ts", ".js")): + return "owned" + return "mention" + + def _utility_relationship_attributes(self, facts: list[ObservedFact]) -> list[str]: + relationships = sorted({self._utility_relationship(fact) for fact in facts}) + return [f"utility-{relationship}" for relationship in relationships] + def _source_refs(self, facts: list[ObservedFact]) -> list[SourceReference]: return [ SourceReference( diff --git a/src/repo_registry/repo_scanning/scanner.py b/src/repo_registry/repo_scanning/scanner.py index 4156a51..90abbe0 100644 --- a/src/repo_registry/repo_scanning/scanner.py +++ b/src/repo_registry/repo_scanning/scanner.py @@ -358,6 +358,10 @@ class DeterministicScanner: source_role = self._source_role(relative) if source_role == "agent_guidance": continue + utility_relationship = self._provider_utility_relationship( + source_role, + relative, + ) for needle, provider in LLM_PROVIDER_HINTS.items(): if not self._has_provider_signal(lower_text, needle): continue @@ -372,6 +376,7 @@ class DeterministicScanner: metadata={ "source": "provider_hint", "source_role": source_role, + "utility_relationship": utility_relationship, }, ), ) @@ -389,6 +394,7 @@ class DeterministicScanner: metadata={ "source": "environment_variable", "source_role": source_role, + "utility_relationship": "configure", }, ), ) @@ -412,6 +418,7 @@ class DeterministicScanner: metadata={ "source": "provider_registry_hint", "source_role": source_role, + "utility_relationship": utility_relationship, }, ), ) @@ -429,11 +436,30 @@ class DeterministicScanner: metadata={ "source": "fallback_hint", "source_role": source_role, + "utility_relationship": utility_relationship, }, ), ) return facts + def _provider_utility_relationship( + self, + source_role: str, + relative_path: str, + ) -> str: + if source_role == "implementation_source": + lower = relative_path.lower() + if "adapter" in lower or "provider" in lower: + return "adapter" + return "owned" + if source_role == "configuration": + return "configure" + if source_role == "dependency_declaration": + return "dependency" + if source_role in {"ci_tooling", "agent_guidance"}: + return "tooling" + return "mention" + def _source_role(self, relative_path: str) -> str: lower = relative_path.lower() parts = lower.split("/") diff --git a/tests/test_candidate_graph.py b/tests/test_candidate_graph.py index 0d0305b..1f172ef 100644 --- a/tests/test_candidate_graph.py +++ b/tests/test_candidate_graph.py @@ -2,7 +2,7 @@ from repo_registry.candidate_graph.generator import CandidateGraphGenerator from repo_registry.core.models import ContentChunk, ObservedFact, Repository -def fact(id, kind, name, path="", value=""): +def fact(id, kind, name, path="", value="", metadata=None): return ObservedFact( id=id, repository_id=1, @@ -12,7 +12,7 @@ def fact(id, kind, name, path="", value=""): path=path, name=name, value=value, - metadata={}, + metadata=metadata or {}, ) @@ -310,11 +310,44 @@ def test_candidate_generator_maps_llm_provider_facts_to_capability(): ) facts = [ fact(1, "documentation", "README", "README.md"), - fact(2, "llm_provider", "OpenRouter", "providers.py", "openrouter"), - fact(3, "llm_provider", "Claude", "providers.py", "claude"), - fact(4, "credential_config", "OpenRouter API key", ".env.example", "OPENROUTER_API_KEY"), - fact(5, "provider_registry", "LLM provider registry", "providers.py"), - fact(6, "fallback_policy", "LLM provider fallback policy", "providers.py"), + fact( + 2, + "llm_provider", + "OpenRouter", + "providers.py", + "openrouter", + {"source_role": "implementation_source", "utility_relationship": "adapter"}, + ), + fact( + 3, + "llm_provider", + "Claude", + "providers.py", + "claude", + {"source_role": "implementation_source", "utility_relationship": "adapter"}, + ), + fact( + 4, + "credential_config", + "OpenRouter API key", + ".env.example", + "OPENROUTER_API_KEY", + {"source_role": "configuration", "utility_relationship": "configure"}, + ), + fact( + 5, + "provider_registry", + "LLM provider registry", + "providers.py", + metadata={"source_role": "implementation_source", "utility_relationship": "adapter"}, + ), + fact( + 6, + "fallback_policy", + "LLM provider fallback policy", + "providers.py", + metadata={"source_role": "implementation_source", "utility_relationship": "adapter"}, + ), ] graph = CandidateGraphGenerator().generate(repository, facts) @@ -329,6 +362,7 @@ def test_candidate_generator_maps_llm_provider_facts_to_capability(): assert {"llm-provider", "openrouter", "claude", "fallback-policy"} <= set( capability.attributes ) + assert {"utility-adapter", "utility-configure"} <= set(capability.attributes) feature_names = {feature.name for feature in capability.features} assert {"Use OpenRouter Models", "Use Claude Models"} <= feature_names assert "Configure LLM Provider Credentials" in feature_names @@ -338,4 +372,34 @@ def test_candidate_generator_maps_llm_provider_facts_to_capability(): feature for feature in capability.features if feature.name == "Use OpenRouter Models" ) assert openrouter_feature.primary_class == "integration" + + +def test_candidate_generator_does_not_promote_llm_provider_mentions_to_capability(): + repository = Repository( + id=1, + name="MentionOnly", + url="/tmp/mention-only", + description=None, + branch="main", + status="analyzed", + ) + facts = [ + fact(1, "documentation", "README", "README.md"), + fact( + 2, + "llm_provider", + "Claude", + "README.md", + "claude", + {"source_role": "product_documentation", "utility_relationship": "mention"}, + ), + ] + + graph = CandidateGraphGenerator().generate(repository, facts) + + assert [ + capability.name + for capability in graph[0].capabilities + if capability.name == "Route LLM Requests Across Providers" + ] == [] assert {"llm-provider", "openrouter"} <= set(openrouter_feature.attributes) diff --git a/tests/test_repository_scanner.py b/tests/test_repository_scanner.py index 3b44228..5a06cd8 100644 --- a/tests/test_repository_scanner.py +++ b/tests/test_repository_scanner.py @@ -142,6 +142,20 @@ def test_scanner_records_llm_provider_and_fallback_facts(tmp_path): assert ("provider_registry", "LLM provider registry", "providers.py") in facts assert ("fallback_policy", "LLM provider fallback policy", "README.md") in facts + by_key = {(fact.kind, fact.name, fact.path): fact for fact in result.facts} + assert by_key[("llm_provider", "OpenRouter", "README.md")].metadata[ + "utility_relationship" + ] == "mention" + assert by_key[("llm_provider", "OpenRouter", "providers.py")].metadata[ + "utility_relationship" + ] == "adapter" + assert by_key[("credential_config", "OpenRouter API key", ".env.example")].metadata[ + "utility_relationship" + ] == "configure" + assert by_key[("provider_registry", "LLM provider registry", "providers.py")].metadata[ + "utility_relationship" + ] == "adapter" + def test_scanner_does_not_treat_agent_guidance_as_llm_provider(tmp_path): repo = tmp_path / "key-cape-like" diff --git a/workplans/RREG-WP-0009-provenance-aware-characteristic-rebuild.md b/workplans/RREG-WP-0009-provenance-aware-characteristic-rebuild.md index 7adcf94..6b8ef6d 100644 --- a/workplans/RREG-WP-0009-provenance-aware-characteristic-rebuild.md +++ b/workplans/RREG-WP-0009-provenance-aware-characteristic-rebuild.md @@ -98,7 +98,7 @@ Acceptance criteria: ```task id: RREG-WP-0009-T03 -status: todo +status: in_progress priority: high state_hub_task_id: "3b8bac53-6a14-43b3-9a59-e15c24c0cd6e" ```